drm/amdkfd: Fix race between scheduler and context restore
authorJay Cornwall <Jay.Cornwall@amd.com>
Thu, 12 Jul 2018 02:32:46 +0000 (22:32 -0400)
committerOded Gabbay <oded.gabbay@gmail.com>
Thu, 12 Jul 2018 02:32:46 +0000 (22:32 -0400)
The scheduler may raise SQ_WAVE_STATUS.SPI_PRIO via SQ_CMD before
context restore has completed. Restoring SPI_PRIO=0 after this point
may cause context save to fail as the lower priority wavefronts
are not selected for execution among spin-waiting wavefronts.

Leave SPI_PRIO at its SPI-initialized or scheduler-raised value.

v2: Also fix race with exception handler

Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm

index f68aef02fc1fc116f4ec64f953a8b0da3d46e740..3621efbd57595df861e9d2088f6fd2e02c596eef 100644 (file)
  */
 
 static const uint32_t cwsr_trap_gfx8_hex[] = {
-       0xbf820001, 0xbf820125,
+       0xbf820001, 0xbf82012b,
        0xb8f4f802, 0x89748674,
        0xb8f5f803, 0x8675ff75,
-       0x00000400, 0xbf850011,
+       0x00000400, 0xbf850017,
        0xc00a1e37, 0x00000000,
        0xbf8c007f, 0x87777978,
-       0xbf840002, 0xb974f802,
-       0xbe801d78, 0xb8f5f803,
-       0x8675ff75, 0x000001ff,
-       0xbf850002, 0x80708470,
-       0x82718071, 0x8671ff71,
-       0x0000ffff, 0xb974f802,
+       0xbf840005, 0x8f728374,
+       0xb972e0c2, 0xbf800002,
+       0xb9740002, 0xbe801d78,
+       0xb8f5f803, 0x8675ff75,
+       0x000001ff, 0xbf850002,
+       0x80708470, 0x82718071,
+       0x8671ff71, 0x0000ffff,
+       0x8f728374, 0xb972e0c2,
+       0xbf800002, 0xb9740002,
        0xbe801f70, 0xb8f5f803,
        0x8675ff75, 0x00000100,
        0xbf840006, 0xbefa0080,
@@ -168,7 +171,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
        0x807c847c, 0x806eff6e,
        0x00000400, 0xbf0a757c,
        0xbf85ffef, 0xbf9c0000,
-       0xbf8200ca, 0xbef8007e,
+       0xbf8200cd, 0xbef8007e,
        0x8679ff7f, 0x0000ffff,
        0x8779ff79, 0x00040000,
        0xbefa0080, 0xbefb00ff,
@@ -268,16 +271,18 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
        0x8f739773, 0xb976f807,
        0x8671ff71, 0x0000ffff,
        0x86fe7e7e, 0x86ea6a6a,
-       0xb974f802, 0xbf8a0000,
-       0x95807370, 0xbf810000,
+       0x8f768374, 0xb976e0c2,
+       0xbf800002, 0xb9740002,
+       0xbf8a0000, 0x95807370,
+       0xbf810000, 0x00000000,
 };
 
 
 static const uint32_t cwsr_trap_gfx9_hex[] = {
-       0xbf820001, 0xbf82015a,
+       0xbf820001, 0xbf82015d,
        0xb8f8f802, 0x89788678,
        0xb8f1f803, 0x866eff71,
-       0x00000400, 0xbf850034,
+       0x00000400, 0xbf850037,
        0x866eff71, 0x00000800,
        0xbf850003, 0x866eff71,
        0x00000100, 0xbf840008,
@@ -303,258 +308,261 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
        0x8f6e8b77, 0x866eff6e,
        0x001f8000, 0xb96ef807,
        0x86fe7e7e, 0x86ea6a6a,
-       0xb978f802, 0xbe801f6c,
-       0x866dff6d, 0x0000ffff,
-       0xbef00080, 0xb9700283,
-       0xb8f02407, 0x8e709c70,
-       0x876d706d, 0xb8f003c7,
-       0x8e709b70, 0x876d706d,
-       0xb8f0f807, 0x8670ff70,
-       0x00007fff, 0xb970f807,
-       0xbeee007e, 0xbeef007f,
-       0xbefe0180, 0xbf900004,
-       0x87708478, 0xb970f802,
-       0xbf8e0002, 0xbf88fffe,
-       0xb8f02a05, 0x80708170,
-       0x8e708a70, 0xb8f11605,
-       0x80718171, 0x8e718671,
-       0x80707170, 0x80707e70,
-       0x8271807f, 0x8671ff71,
-       0x0000ffff, 0xc0471cb8,
-       0x00000040, 0xbf8cc07f,
-       0xc04b1d38, 0x00000048,
-       0xbf8cc07f, 0xc0431e78,
-       0x00000058, 0xbf8cc07f,
-       0xc0471eb8, 0x0000005c,
-       0xbf8cc07f, 0xbef4007e,
-       0x8675ff7f, 0x0000ffff,
-       0x8775ff75, 0x00040000,
-       0xbef60080, 0xbef700ff,
-       0x00807fac, 0x8670ff7f,
-       0x08000000, 0x8f708370,
-       0x87777077, 0x8670ff7f,
-       0x70000000, 0x8f708170,
-       0x87777077, 0xbefb007c,
-       0xbefa0080, 0xb8fa2a05,
-       0x807a817a, 0x8e7a8a7a,
-       0xb8f01605, 0x80708170,
-       0x8e708670, 0x807a707a,
-       0xbef60084, 0xbef600ff,
-       0x01000000, 0xbefe007c,
-       0xbefc007a, 0xc0611efa,
-       0x0000007c, 0xbf8cc07f,
-       0x807a847a, 0xbefc007e,
+       0x8f6e8378, 0xb96ee0c2,
+       0xbf800002, 0xb9780002,
+       0xbe801f6c, 0x866dff6d,
+       0x0000ffff, 0xbef00080,
+       0xb9700283, 0xb8f02407,
+       0x8e709c70, 0x876d706d,
+       0xb8f003c7, 0x8e709b70,
+       0x876d706d, 0xb8f0f807,
+       0x8670ff70, 0x00007fff,
+       0xb970f807, 0xbeee007e,
+       0xbeef007f, 0xbefe0180,
+       0xbf900004, 0x87708478,
+       0xb970f802, 0xbf8e0002,
+       0xbf88fffe, 0xb8f02a05,
+       0x80708170, 0x8e708a70,
+       0xb8f11605, 0x80718171,
+       0x8e718671, 0x80707170,
+       0x80707e70, 0x8271807f,
+       0x8671ff71, 0x0000ffff,
+       0xc0471cb8, 0x00000040,
+       0xbf8cc07f, 0xc04b1d38,
+       0x00000048, 0xbf8cc07f,
+       0xc0431e78, 0x00000058,
+       0xbf8cc07f, 0xc0471eb8,
+       0x0000005c, 0xbf8cc07f,
+       0xbef4007e, 0x8675ff7f,
+       0x0000ffff, 0x8775ff75,
+       0x00040000, 0xbef60080,
+       0xbef700ff, 0x00807fac,
+       0x8670ff7f, 0x08000000,
+       0x8f708370, 0x87777077,
+       0x8670ff7f, 0x70000000,
+       0x8f708170, 0x87777077,
+       0xbefb007c, 0xbefa0080,
+       0xb8fa2a05, 0x807a817a,
+       0x8e7a8a7a, 0xb8f01605,
+       0x80708170, 0x8e708670,
+       0x807a707a, 0xbef60084,
+       0xbef600ff, 0x01000000,
        0xbefe007c, 0xbefc007a,
-       0xc0611b3a, 0x0000007c,
+       0xc0611efa, 0x0000007c,
        0xbf8cc07f, 0x807a847a,
        0xbefc007e, 0xbefe007c,
-       0xbefc007a, 0xc0611b7a,
+       0xbefc007a, 0xc0611b3a,
        0x0000007c, 0xbf8cc07f,
        0x807a847a, 0xbefc007e,
        0xbefe007c, 0xbefc007a,
-       0xc0611bba, 0x0000007c,
+       0xc0611b7a, 0x0000007c,
        0xbf8cc07f, 0x807a847a,
        0xbefc007e, 0xbefe007c,
-       0xbefc007a, 0xc0611bfa,
+       0xbefc007a, 0xc0611bba,
        0x0000007c, 0xbf8cc07f,
        0x807a847a, 0xbefc007e,
        0xbefe007c, 0xbefc007a,
-       0xc0611e3a, 0x0000007c,
-       0xbf8cc07f, 0x807a847a,
-       0xbefc007e, 0xb8f1f803,
-       0xbefe007c, 0xbefc007a,
-       0xc0611c7a, 0x0000007c,
+       0xc0611bfa, 0x0000007c,
        0xbf8cc07f, 0x807a847a,
        0xbefc007e, 0xbefe007c,
-       0xbefc007a, 0xc0611a3a,
+       0xbefc007a, 0xc0611e3a,
+       0x0000007c, 0xbf8cc07f,
+       0x807a847a, 0xbefc007e,
+       0xb8f1f803, 0xbefe007c,
+       0xbefc007a, 0xc0611c7a,
        0x0000007c, 0xbf8cc07f,
        0x807a847a, 0xbefc007e,
        0xbefe007c, 0xbefc007a,
-       0xc0611a7a, 0x0000007c,
-       0xbf8cc07f, 0x807a847a,
-       0xbefc007e, 0xb8fbf801,
-       0xbefe007c, 0xbefc007a,
-       0xc0611efa, 0x0000007c,
+       0xc0611a3a, 0x0000007c,
        0xbf8cc07f, 0x807a847a,
-       0xbefc007e, 0x8670ff7f,
-       0x04000000, 0xbeef0080,
-       0x876f6f70, 0xb8fa2a05,
+       0xbefc007e, 0xbefe007c,
+       0xbefc007a, 0xc0611a7a,
+       0x0000007c, 0xbf8cc07f,
+       0x807a847a, 0xbefc007e,
+       0xb8fbf801, 0xbefe007c,
+       0xbefc007a, 0xc0611efa,
+       0x0000007c, 0xbf8cc07f,
+       0x807a847a, 0xbefc007e,
+       0x8670ff7f, 0x04000000,
+       0xbeef0080, 0x876f6f70,
+       0xb8fa2a05, 0x807a817a,
+       0x8e7a8a7a, 0xb8f11605,
+       0x80718171, 0x8e718471,
+       0x8e768271, 0xbef600ff,
+       0x01000000, 0xbef20174,
+       0x80747a74, 0x82758075,
+       0xbefc0080, 0xbf800000,
+       0xbe802b00, 0xbe822b02,
+       0xbe842b04, 0xbe862b06,
+       0xbe882b08, 0xbe8a2b0a,
+       0xbe8c2b0c, 0xbe8e2b0e,
+       0xc06b003a, 0x00000000,
+       0xbf8cc07f, 0xc06b013a,
+       0x00000010, 0xbf8cc07f,
+       0xc06b023a, 0x00000020,
+       0xbf8cc07f, 0xc06b033a,
+       0x00000030, 0xbf8cc07f,
+       0x8074c074, 0x82758075,
+       0x807c907c, 0xbf0a717c,
+       0xbf85ffe7, 0xbef40172,
+       0xbefa0080, 0xbefe00c1,
+       0xbeff00c1, 0xbee80080,
+       0xbee90080, 0xbef600ff,
+       0x01000000, 0xe0724000,
+       0x7a1d0000, 0xe0724100,
+       0x7a1d0100, 0xe0724200,
+       0x7a1d0200, 0xe0724300,
+       0x7a1d0300, 0xbefe00c1,
+       0xbeff00c1, 0xb8f14306,
+       0x8671c171, 0xbf84002c,
+       0xbf8a0000, 0x8670ff6f,
+       0x04000000, 0xbf840028,
+       0x8e718671, 0x8e718271,
+       0xbef60071, 0xb8fa2a05,
        0x807a817a, 0x8e7a8a7a,
-       0xb8f11605, 0x80718171,
-       0x8e718471, 0x8e768271,
+       0xb8f01605, 0x80708170,
+       0x8e708670, 0x807a707a,
+       0x807aff7a, 0x00000080,
        0xbef600ff, 0x01000000,
-       0xbef20174, 0x80747a74,
-       0x82758075, 0xbefc0080,
-       0xbf800000, 0xbe802b00,
-       0xbe822b02, 0xbe842b04,
-       0xbe862b06, 0xbe882b08,
-       0xbe8a2b0a, 0xbe8c2b0c,
-       0xbe8e2b0e, 0xc06b003a,
-       0x00000000, 0xbf8cc07f,
-       0xc06b013a, 0x00000010,
-       0xbf8cc07f, 0xc06b023a,
-       0x00000020, 0xbf8cc07f,
-       0xc06b033a, 0x00000030,
-       0xbf8cc07f, 0x8074c074,
-       0x82758075, 0x807c907c,
-       0xbf0a717c, 0xbf85ffe7,
-       0xbef40172, 0xbefa0080,
+       0xbefc0080, 0xd28c0002,
+       0x000100c1, 0xd28d0003,
+       0x000204c1, 0xd1060002,
+       0x00011103, 0x7e0602ff,
+       0x00000200, 0xbefc00ff,
+       0x00010000, 0xbe800077,
+       0x8677ff77, 0xff7fffff,
+       0x8777ff77, 0x00058000,
+       0xd8ec0000, 0x00000002,
+       0xbf8cc07f, 0xe0765000,
+       0x7a1d0002, 0x68040702,
+       0xd0c9006a, 0x0000e302,
+       0xbf87fff7, 0xbef70000,
+       0xbefa00ff, 0x00000400,
        0xbefe00c1, 0xbeff00c1,
-       0xbee80080, 0xbee90080,
+       0xb8f12a05, 0x80718171,
+       0x8e718271, 0x8e768871,
        0xbef600ff, 0x01000000,
+       0xbefc0084, 0xbf0a717c,
+       0xbf840015, 0xbf11017c,
+       0x8071ff71, 0x00001000,
+       0x7e000300, 0x7e020301,
+       0x7e040302, 0x7e060303,
        0xe0724000, 0x7a1d0000,
        0xe0724100, 0x7a1d0100,
        0xe0724200, 0x7a1d0200,
        0xe0724300, 0x7a1d0300,
+       0x807c847c, 0x807aff7a,
+       0x00000400, 0xbf0a717c,
+       0xbf85ffef, 0xbf9c0000,
+       0xbf8200dc, 0xbef4007e,
+       0x8675ff7f, 0x0000ffff,
+       0x8775ff75, 0x00040000,
+       0xbef60080, 0xbef700ff,
+       0x00807fac, 0x866eff7f,
+       0x08000000, 0x8f6e836e,
+       0x87776e77, 0x866eff7f,
+       0x70000000, 0x8f6e816e,
+       0x87776e77, 0x866eff7f,
+       0x04000000, 0xbf84001e,
        0xbefe00c1, 0xbeff00c1,
-       0xb8f14306, 0x8671c171,
-       0xbf84002c, 0xbf8a0000,
-       0x8670ff6f, 0x04000000,
-       0xbf840028, 0x8e718671,
-       0x8e718271, 0xbef60071,
-       0xb8fa2a05, 0x807a817a,
-       0x8e7a8a7a, 0xb8f01605,
-       0x80708170, 0x8e708670,
-       0x807a707a, 0x807aff7a,
+       0xb8ef4306, 0x866fc16f,
+       0xbf840019, 0x8e6f866f,
+       0x8e6f826f, 0xbef6006f,
+       0xb8f82a05, 0x80788178,
+       0x8e788a78, 0xb8ee1605,
+       0x806e816e, 0x8e6e866e,
+       0x80786e78, 0x8078ff78,
        0x00000080, 0xbef600ff,
        0x01000000, 0xbefc0080,
-       0xd28c0002, 0x000100c1,
-       0xd28d0003, 0x000204c1,
-       0xd1060002, 0x00011103,
-       0x7e0602ff, 0x00000200,
-       0xbefc00ff, 0x00010000,
-       0xbe800077, 0x8677ff77,
-       0xff7fffff, 0x8777ff77,
-       0x00058000, 0xd8ec0000,
-       0x00000002, 0xbf8cc07f,
-       0xe0765000, 0x7a1d0002,
-       0x68040702, 0xd0c9006a,
-       0x0000e302, 0xbf87fff7,
-       0xbef70000, 0xbefa00ff,
-       0x00000400, 0xbefe00c1,
-       0xbeff00c1, 0xb8f12a05,
-       0x80718171, 0x8e718271,
-       0x8e768871, 0xbef600ff,
-       0x01000000, 0xbefc0084,
-       0xbf0a717c, 0xbf840015,
-       0xbf11017c, 0x8071ff71,
-       0x00001000, 0x7e000300,
+       0xe0510000, 0x781d0000,
+       0xe0510100, 0x781d0000,
+       0x807cff7c, 0x00000200,
+       0x8078ff78, 0x00000200,
+       0xbf0a6f7c, 0xbf85fff6,
+       0xbef80080, 0xbefe00c1,
+       0xbeff00c1, 0xb8ef2a05,
+       0x806f816f, 0x8e6f826f,
+       0x8e76886f, 0xbef600ff,
+       0x01000000, 0xbeee0078,
+       0x8078ff78, 0x00000400,
+       0xbefc0084, 0xbf11087c,
+       0x806fff6f, 0x00008000,
+       0xe0524000, 0x781d0000,
+       0xe0524100, 0x781d0100,
+       0xe0524200, 0x781d0200,
+       0xe0524300, 0x781d0300,
+       0xbf8c0f70, 0x7e000300,
        0x7e020301, 0x7e040302,
-       0x7e060303, 0xe0724000,
-       0x7a1d0000, 0xe0724100,
-       0x7a1d0100, 0xe0724200,
-       0x7a1d0200, 0xe0724300,
-       0x7a1d0300, 0x807c847c,
-       0x807aff7a, 0x00000400,
-       0xbf0a717c, 0xbf85ffef,
-       0xbf9c0000, 0xbf8200d9,
-       0xbef4007e, 0x8675ff7f,
-       0x0000ffff, 0x8775ff75,
-       0x00040000, 0xbef60080,
-       0xbef700ff, 0x00807fac,
-       0x866eff7f, 0x08000000,
-       0x8f6e836e, 0x87776e77,
-       0x866eff7f, 0x70000000,
-       0x8f6e816e, 0x87776e77,
-       0x866eff7f, 0x04000000,
-       0xbf84001e, 0xbefe00c1,
-       0xbeff00c1, 0xb8ef4306,
-       0x866fc16f, 0xbf840019,
-       0x8e6f866f, 0x8e6f826f,
-       0xbef6006f, 0xb8f82a05,
+       0x7e060303, 0x807c847c,
+       0x8078ff78, 0x00000400,
+       0xbf0a6f7c, 0xbf85ffee,
+       0xbf9c0000, 0xe0524000,
+       0x6e1d0000, 0xe0524100,
+       0x6e1d0100, 0xe0524200,
+       0x6e1d0200, 0xe0524300,
+       0x6e1d0300, 0xb8f82a05,
        0x80788178, 0x8e788a78,
        0xb8ee1605, 0x806e816e,
        0x8e6e866e, 0x80786e78,
-       0x8078ff78, 0x00000080,
-       0xbef600ff, 0x01000000,
-       0xbefc0080, 0xe0510000,
-       0x781d0000, 0xe0510100,
-       0x781d0000, 0x807cff7c,
-       0x00000200, 0x8078ff78,
-       0x00000200, 0xbf0a6f7c,
-       0xbf85fff6, 0xbef80080,
-       0xbefe00c1, 0xbeff00c1,
-       0xb8ef2a05, 0x806f816f,
-       0x8e6f826f, 0x8e76886f,
-       0xbef600ff, 0x01000000,
-       0xbeee0078, 0x8078ff78,
-       0x00000400, 0xbefc0084,
-       0xbf11087c, 0x806fff6f,
-       0x00008000, 0xe0524000,
-       0x781d0000, 0xe0524100,
-       0x781d0100, 0xe0524200,
-       0x781d0200, 0xe0524300,
-       0x781d0300, 0xbf8c0f70,
-       0x7e000300, 0x7e020301,
-       0x7e040302, 0x7e060303,
-       0x807c847c, 0x8078ff78,
-       0x00000400, 0xbf0a6f7c,
-       0xbf85ffee, 0xbf9c0000,
-       0xe0524000, 0x6e1d0000,
-       0xe0524100, 0x6e1d0100,
-       0xe0524200, 0x6e1d0200,
-       0xe0524300, 0x6e1d0300,
+       0x80f8c078, 0xb8ef1605,
+       0x806f816f, 0x8e6f846f,
+       0x8e76826f, 0xbef600ff,
+       0x01000000, 0xbefc006f,
+       0xc031003a, 0x00000078,
+       0x80f8c078, 0xbf8cc07f,
+       0x80fc907c, 0xbf800000,
+       0xbe802d00, 0xbe822d02,
+       0xbe842d04, 0xbe862d06,
+       0xbe882d08, 0xbe8a2d0a,
+       0xbe8c2d0c, 0xbe8e2d0e,
+       0xbf06807c, 0xbf84fff0,
        0xb8f82a05, 0x80788178,
        0x8e788a78, 0xb8ee1605,
        0x806e816e, 0x8e6e866e,
-       0x80786e78, 0x80f8c078,
-       0xb8ef1605, 0x806f816f,
-       0x8e6f846f, 0x8e76826f,
+       0x80786e78, 0xbef60084,
        0xbef600ff, 0x01000000,
-       0xbefc006f, 0xc031003a,
-       0x00000078, 0x80f8c078,
-       0xbf8cc07f, 0x80fc907c,
-       0xbf800000, 0xbe802d00,
-       0xbe822d02, 0xbe842d04,
-       0xbe862d06, 0xbe882d08,
-       0xbe8a2d0a, 0xbe8c2d0c,
-       0xbe8e2d0e, 0xbf06807c,
-       0xbf84fff0, 0xb8f82a05,
-       0x80788178, 0x8e788a78,
-       0xb8ee1605, 0x806e816e,
-       0x8e6e866e, 0x80786e78,
-       0xbef60084, 0xbef600ff,
-       0x01000000, 0xc0211bfa,
+       0xc0211bfa, 0x00000078,
+       0x80788478, 0xc0211b3a,
        0x00000078, 0x80788478,
-       0xc0211b3a, 0x00000078,
-       0x80788478, 0xc0211b7a,
+       0xc0211b7a, 0x00000078,
+       0x80788478, 0xc0211eba,
        0x00000078, 0x80788478,
-       0xc0211eba, 0x00000078,
-       0x80788478, 0xc0211efa,
+       0xc0211efa, 0x00000078,
+       0x80788478, 0xc0211c3a,
        0x00000078, 0x80788478,
-       0xc0211c3a, 0x00000078,
-       0x80788478, 0xc0211c7a,
+       0xc0211c7a, 0x00000078,
+       0x80788478, 0xc0211a3a,
        0x00000078, 0x80788478,
-       0xc0211a3a, 0x00000078,
-       0x80788478, 0xc0211a7a,
+       0xc0211a7a, 0x00000078,
+       0x80788478, 0xc0211cfa,
        0x00000078, 0x80788478,
-       0xc0211cfa, 0x00000078,
-       0x80788478, 0xbf8cc07f,
-       0xbefc006f, 0xbefe007a,
-       0xbeff007b, 0x866f71ff,
-       0x000003ff, 0xb96f4803,
-       0x866f71ff, 0xfffff800,
-       0x8f6f8b6f, 0xb96fa2c3,
-       0xb973f801, 0xb8ee2a05,
-       0x806e816e, 0x8e6e8a6e,
-       0xb8ef1605, 0x806f816f,
-       0x8e6f866f, 0x806e6f6e,
-       0x806e746e, 0x826f8075,
-       0x866fff6f, 0x0000ffff,
-       0xc0071cb7, 0x00000040,
-       0xc00b1d37, 0x00000048,
-       0xc0031e77, 0x00000058,
-       0xc0071eb7, 0x0000005c,
-       0xbf8cc07f, 0x866fff6d,
-       0xf0000000, 0x8f6f9c6f,
-       0x8e6f906f, 0xbeee0080,
-       0x876e6f6e, 0x866fff6d,
-       0x08000000, 0x8f6f9b6f,
-       0x8e6f8f6f, 0x876e6f6e,
-       0x866fff70, 0x00800000,
-       0x8f6f976f, 0xb96ef807,
-       0x866dff6d, 0x0000ffff,
-       0x86fe7e7e, 0x86ea6a6a,
-       0xb970f802, 0xbf8a0000,
+       0xbf8cc07f, 0xbefc006f,
+       0xbefe007a, 0xbeff007b,
+       0x866f71ff, 0x000003ff,
+       0xb96f4803, 0x866f71ff,
+       0xfffff800, 0x8f6f8b6f,
+       0xb96fa2c3, 0xb973f801,
+       0xb8ee2a05, 0x806e816e,
+       0x8e6e8a6e, 0xb8ef1605,
+       0x806f816f, 0x8e6f866f,
+       0x806e6f6e, 0x806e746e,
+       0x826f8075, 0x866fff6f,
+       0x0000ffff, 0xc0071cb7,
+       0x00000040, 0xc00b1d37,
+       0x00000048, 0xc0031e77,
+       0x00000058, 0xc0071eb7,
+       0x0000005c, 0xbf8cc07f,
+       0x866fff6d, 0xf0000000,
+       0x8f6f9c6f, 0x8e6f906f,
+       0xbeee0080, 0x876e6f6e,
+       0x866fff6d, 0x08000000,
+       0x8f6f9b6f, 0x8e6f8f6f,
+       0x876e6f6e, 0x866fff70,
+       0x00800000, 0x8f6f976f,
+       0xb96ef807, 0x866dff6d,
+       0x0000ffff, 0x86fe7e7e,
+       0x86ea6a6a, 0x8f6e8370,
+       0xb96ee0c2, 0xbf800002,
+       0xb9700002, 0xbf8a0000,
        0x95806f6c, 0xbf810000,
 };
index a2a04bb64096f0ca6801fbef0e28aa931e7caa0b..abe1a5da29fb313b7ae03777bec24ddf8082a40c 100644 (file)
@@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_SHIFT  = 23
 var SQ_WAVE_STATUS_INST_ATC_MASK   = 0x00800000
 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT  = 1
 var SQ_WAVE_STATUS_SPI_PRIO_MASK   = 0x00000006
+var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT   = 0
+var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE    = 1
+var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT  = 3
+var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE   = 29
 
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT    = 12
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE     = 9
@@ -251,7 +255,7 @@ if (!EMU_RUN_HACK)
     s_waitcnt lgkmcnt(0)
     s_or_b32        ttmp7, ttmp8, ttmp9
     s_cbranch_scc0  L_NO_NEXT_TRAP //next level trap handler not been set
-    s_setreg_b32    hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC)
+    set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC)
     s_setpc_b64     [ttmp8,ttmp9] //jump to next level trap handler
 
 L_NO_NEXT_TRAP:
@@ -262,7 +266,7 @@ L_NO_NEXT_TRAP:
     s_addc_u32  ttmp1, ttmp1, 0
 L_EXCP_CASE:
     s_and_b32   ttmp1, ttmp1, 0xFFFF
-    s_setreg_b32    hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC)
+    set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC)
     s_rfe_b64       [ttmp0, ttmp1]
 end
     // *********        End handling of non-CWSR traps   *******************
@@ -1053,7 +1057,7 @@ end
     s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff      //pc[47:32]        //Do it here in order not to affect STATUS
     s_and_b64    exec, exec, exec  // Restore STATUS.EXECZ, not writable by s_setreg_b32
     s_and_b64    vcc, vcc, vcc  // Restore STATUS.VCCZ, not writable by s_setreg_b32
-    s_setreg_b32    hwreg(HW_REG_STATUS),   s_restore_status     // SCC is included, which is changed by previous salu
+    set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu
 
     s_barrier                                                   //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
 
@@ -1134,3 +1138,11 @@ end
 function get_hwreg_size_bytes
     return 128 //HWREG size 128 bytes
 end
+
+function set_status_without_spi_prio(status, tmp)
+    // Do not restore STATUS.SPI_PRIO since scheduler may have raised it.
+    s_lshr_b32      tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT
+    s_setreg_b32    hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp
+    s_nop           0x2 // avoid S_SETREG => S_SETREG hazard
+    s_setreg_b32    hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
+end
index 998be96be7361d685e0b83cc36f62c03f1bd9c6a..0bb9c577b3a2c8b7a93e2b9dc330fdff1b32f5ea 100644 (file)
@@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_MASK   = 0x00800000
 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT  = 1
 var SQ_WAVE_STATUS_SPI_PRIO_MASK   = 0x00000006
 var SQ_WAVE_STATUS_HALT_MASK       = 0x2000
+var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT   = 0
+var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE    = 1
+var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT  = 3
+var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE   = 29
 
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT   = 12
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE    = 9
@@ -317,7 +321,7 @@ L_EXCP_CASE:
     // Restore SQ_WAVE_STATUS.
     s_and_b64       exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
     s_and_b64       vcc, vcc, vcc    // Restore STATUS.VCCZ, not writable by s_setreg_b32
-    s_setreg_b32    hwreg(HW_REG_STATUS), s_save_status
+    set_status_without_spi_prio(s_save_status, ttmp2)
 
     s_rfe_b64       [ttmp0, ttmp1]
 end
@@ -1120,7 +1124,7 @@ end
     s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff     //pc[47:32]        //Do it here in order not to affect STATUS
     s_and_b64   exec, exec, exec  // Restore STATUS.EXECZ, not writable by s_setreg_b32
     s_and_b64   vcc, vcc, vcc  // Restore STATUS.VCCZ, not writable by s_setreg_b32
-    s_setreg_b32    hwreg(HW_REG_STATUS),   s_restore_status    // SCC is included, which is changed by previous salu
+    set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu
 
     s_barrier                                                  //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
 
@@ -1212,3 +1216,11 @@ function ack_sqc_store_workaround
         s_waitcnt lgkmcnt(0)
     end
 end
+
+function set_status_without_spi_prio(status, tmp)
+    // Do not restore STATUS.SPI_PRIO since scheduler may have raised it.
+    s_lshr_b32      tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT
+    s_setreg_b32    hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp
+    s_nop           0x2 // avoid S_SETREG => S_SETREG hazard
+    s_setreg_b32    hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
+end