struct nvkm_device *device = gr->base.engine.subdev.device;
const u8 gpcmax = nvkm_rd32(device, 0x022430);
const u8 tpcmax = nvkm_rd32(device, 0x022434) * gpcmax;
- u8 tpcnr[GPC_MAX], data[TPC_MAX];
- int gpc, tpc, i;
-
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- memset(data, 0x1f, sizeof(data));
-
- gpc = -1;
- for (tpc = 0; tpc < gr->tpc_total; tpc++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpcnr[gpc]--;
- data[tpc] = gpc;
- }
+ int i, j, sm = 0;
+ u32 data;
- for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++)
- nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
+ for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++) {
+ for (data = 0, j = 0; j < 4; j++) {
+ if (sm < gr->sm_nr)
+ data |= gr->sm[sm++].gpc << (j * 8);
+ else
+ data |= 0x1f << (j * 8);
+ }
+ nvkm_wr32(device, 0x4060a8 + (i * 4), data);
+ }
}
void
{
struct nvkm_device *device = gr->base.engine.subdev.device;
const struct gf100_grctx_func *func = gr->func->grctx;
- int tpc, gpc, sm, i, j;
+ int gpc, sm, i, j;
u32 data;
- for (tpc = 0, sm = 0; tpc < gr->tpc_max; tpc++) {
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- if (tpc < gr->tpc_nr[gpc])
- func->sm_id(gr, gpc, tpc, sm++);
- if (func->tpc_nr)
- func->tpc_nr(gr, gpc);
- }
+ for (sm = 0; sm < gr->sm_nr; sm++) {
+ func->sm_id(gr, gr->sm[sm].gpc, gr->sm[sm].tpc, sm);
+ if (func->tpc_nr)
+ func->tpc_nr(gr, gr->sm[sm].gpc);
}
for (gpc = 0, i = 0; i < 4; i++) {
const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
u32 dist[TPC_MAX / 4] = {};
u32 gpcs[GPC_MAX] = {};
- u8 tpcnr[GPC_MAX];
- int tpc, gpc, i;
+ u8 sm, i;
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
- /* won't result in the same distribution as the binary driver where
- * some of the gpcs have more tpcs than others, but this shall do
- * for the moment. the code for earlier gpus has this issue too.
- */
- for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while(!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
- gpcs[gpc] |= i << (tpc * 8);
+ for (sm = 0; sm < gr->sm_nr; sm++) {
+ const u8 gpc = gr->sm[sm].gpc;
+ const u8 tpc = gr->sm[sm].tpc;
+ dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
+ gpcs[gpc] |= sm << (tpc * 8);
}
for (i = 0; i < dist_nr; i++)
struct nvkm_device *device = gr->base.engine.subdev.device;
const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
u32 dist[TPC_MAX / 4] = {}, gpcs[16] = {};
- u8 tpcnr[GPC_MAX];
- int tpc, gpc, i;
+ u8 sm, i;
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
- /* won't result in the same distribution as the binary driver where
- * some of the gpcs have more tpcs than others, but this shall do
- * for the moment. the code for earlier gpus has this issue too.
- */
- for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while(!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
- gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
+ for (sm = 0; sm < gr->sm_nr; sm++) {
+ const u8 gpc = gr->sm[sm].gpc;
+ const u8 tpc = gr->sm[sm].tpc;
+ dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
+ gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= sm << ((tpc % 4) * 8);
}
for (i = 0; i < dist_nr; i++)
return ret;
}
+void
+gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
+{
+ int tpc, gpc;
+ for (tpc = 0; tpc < gr->tpc_max; tpc++) {
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+ if (tpc < gr->tpc_nr[gpc]) {
+ gr->sm[gr->sm_nr].gpc = gpc;
+ gr->sm[gr->sm_nr].tpc = tpc;
+ gr->sm_nr++;
+ }
+ }
+ }
+}
+
void
gf100_gr_oneinit_tiles(struct gf100_gr *gr)
{
memset(gr->tile, 0xff, sizeof(gr->tile));
gr->func->oneinit_tiles(gr);
+ gr->func->oneinit_sm_id(gr);
return 0;
}
static const struct gf100_gr_func
gf100_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
u8 screen_tile_row_offset;
u8 tile[TPC_MAX];
+ struct {
+ u8 gpc;
+ u8 tpc;
+ } sm[TPC_MAX];
+ u8 sm_nr;
+
struct gf100_gr_data mmio_data[4];
struct gf100_gr_mmio mmio_list[4096/8];
u32 size;
struct gf100_gr_func {
void (*dtor)(struct gf100_gr *);
void (*oneinit_tiles)(struct gf100_gr *);
+ void (*oneinit_sm_id)(struct gf100_gr *);
int (*init)(struct gf100_gr *);
void (*init_gpc_mmu)(struct gf100_gr *);
void (*init_r405a14)(struct gf100_gr *);
int gf100_gr_rops(struct gf100_gr *);
void gf100_gr_oneinit_tiles(struct gf100_gr *);
+void gf100_gr_oneinit_sm_id(struct gf100_gr *);
int gf100_gr_init(struct gf100_gr *);
void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
void gf100_gr_init_zcull(struct gf100_gr *);
int gk20a_gr_init(struct gf100_gr *);
void gm200_gr_oneinit_tiles(struct gf100_gr *);
+void gm200_gr_oneinit_sm_id(struct gf100_gr *);
int gm200_gr_rops(struct gf100_gr *);
void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
static const struct gf100_gr_func
gf104_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gf108_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_r405a14 = gf108_gr_init_r405a14,
static const struct gf100_gr_func
gf110_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gf117_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gf119_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gk104_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gk110_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gk110b_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gk208_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gk20a_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gk20a_gr_init,
.init_zcull = gf117_gr_init_zcull,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
static const struct gf100_gr_func
gm107_gr = {
.oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm107_gr_init_gpc_mmu,
.init_bios = gm107_gr_init_bios,
0, 1, 1, 0, 0, 1, 1, 0,
};
+void
+gm200_gr_oneinit_sm_id(struct gf100_gr *gr)
+{
+ /*XXX: There's a different algorithm here I've not yet figured out. */
+ gf100_gr_oneinit_sm_id(gr);
+}
+
void
gm200_gr_oneinit_tiles(struct gf100_gr *gr)
{
static const struct gf100_gr_func
gm200_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_bios = gm107_gr_init_bios,
static const struct gf100_gr_func
gm20b_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gk20a_gr_init,
.init_zcull = gf117_gr_init_zcull,
.init_gpc_mmu = gm20b_gr_init_gpc_mmu,
static const struct gf100_gr_func
gp100_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gp102_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gp104_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gp107_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gp10b_gr = {
.oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,