diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 677 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm | 82 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 12 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 53 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 11 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 27 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 26 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 76 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 28 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 39 |
12 files changed, 598 insertions, 440 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 651660958e5b..0320163b6e74 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -3644,7 +3644,7 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { }; static const uint32_t cwsr_trap_gfx12_hex[] = { - 0xbfa00001, 0xbfa0024b, + 0xbfa00001, 0xbfa002a2, 0xb0804009, 0xb8f8f804, 0x9178ff78, 0x00008c00, 0xb8fbf811, 0x8b6eff78, @@ -3718,7 +3718,15 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0x00011677, 0xd7610000, 0x00011a79, 0xd7610000, 0x00011c7e, 0xd7610000, - 0x00011e7f, 0xbefe00ff, + 0x00011e7f, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xbefe00ff, 0x00003fff, 0xbeff0080, 0xee0a407a, 0x000c0000, 0x00004000, 0xd760007a, @@ -3755,38 +3763,46 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0x00000200, 0xbef600ff, 0x01000000, 0x7e000280, 0x7e020280, 0x7e040280, - 0xbefd0080, 0xbe804ec2, - 0xbf94fffe, 0xb8faf804, - 0x8b7a847a, 0x91788478, - 0x8c787a78, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xd7610002, 0x0000fa6c, - 0x807d817d, 0x917aff6d, - 0x80000000, 0xd7610002, - 0x0000fa7a, 0x807d817d, - 0xd7610002, 0x0000fa6e, - 0x807d817d, 0xd7610002, - 0x0000fa6f, 0x807d817d, - 0xd7610002, 0x0000fa78, - 0x807d817d, 0xb8faf811, - 0xd7610002, 0x0000fa7a, - 0x807d817d, 0xd7610002, - 0x0000fa7b, 0x807d817d, - 0xb8f1f801, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f814, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f815, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f812, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f813, 0xd7610002, - 0x0000fa71, 0x807d817d, + 0xbe804ec2, 0xbf94fffe, + 0xb8faf804, 0x8b7a847a, + 0x91788478, 0x8c787a78, + 0x917aff6d, 0x80000000, + 0xd7610002, 0x00010071, + 0xd7610002, 0x0001026c, + 0xd7610002, 0x0001047a, + 0xd7610002, 0x0001066e, + 0xd7610002, 0x0001086f, + 0xd7610002, 0x00010a78, + 0xd7610002, 0x00010e7b, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xb8faf811, 0xd7610002, + 0x00010c7a, 0xb8faf801, + 0xd7610002, 0x0001107a, + 0xb8faf814, 0xd7610002, + 0x0001127a, 0xb8faf815, + 0xd7610002, 0x0001147a, + 0xb8faf812, 0xd7610002, + 0x0001167a, 0xb8faf813, + 0xd7610002, 0x0001187a, 0xb8faf802, 0xd7610002, - 0x0000fa7a, 0x807d817d, - 0xbefa50c1, 0xbfc70000, - 0xd7610002, 0x0000fa7a, - 0x807d817d, 0xbefe00ff, + 0x00011a7a, 0xbefa50c1, + 0xbfc70000, 0xd7610002, + 0x00011c7a, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xbefe00ff, 0x0000ffff, 0xbeff0080, 0xc4068070, 0x008ce802, 0x00000000, 0xbefe00c1, @@ -3801,331 +3817,358 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0xbe824102, 0xbe844104, 0xbe864106, 0xbe884108, 0xbe8a410a, 0xbe8c410c, - 0xbe8e410e, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, - 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, - 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, - 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, - 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, - 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, - 0x80798179, 0xd7610002, - 0x0000f20c, 0x80798179, - 0xd7610002, 0x0000f20d, - 0x80798179, 0xd7610002, - 0x0000f20e, 0x80798179, - 0xd7610002, 0x0000f20f, - 0x80798179, 0xbf06a079, - 0xbfa10007, 0xc4068070, + 0xbe8e410e, 0xbf068079, + 0xbfa10032, 0xd7610002, + 0x00010000, 0xd7610002, + 0x00010201, 0xd7610002, + 0x00010402, 0xd7610002, + 0x00010603, 0xd7610002, + 0x00010804, 0xd7610002, + 0x00010a05, 0xd7610002, + 0x00010c06, 0xd7610002, + 0x00010e07, 0xd7610002, + 0x00011008, 0xd7610002, + 0x00011209, 0xd7610002, + 0x0001140a, 0xd7610002, + 0x0001160b, 0xd7610002, + 0x0001180c, 0xd7610002, + 0x00011a0d, 0xd7610002, + 0x00011c0e, 0xd7610002, + 0x00011e0f, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0x80799079, + 0xbfa00038, 0xd7610002, + 0x00012000, 0xd7610002, + 0x00012201, 0xd7610002, + 0x00012402, 0xd7610002, + 0x00012603, 0xd7610002, + 0x00012804, 0xd7610002, + 0x00012a05, 0xd7610002, + 0x00012c06, 0xd7610002, + 0x00012e07, 0xd7610002, + 0x00013008, 0xd7610002, + 0x00013209, 0xd7610002, + 0x0001340a, 0xd7610002, + 0x0001360b, 0xd7610002, + 0x0001380c, 0xd7610002, + 0x00013a0d, 0xd7610002, + 0x00013c0e, 0xd7610002, + 0x00013e0f, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0x80799079, + 0xc4068070, 0x008ce802, + 0x00000000, 0x8070ff70, + 0x00000080, 0xbef90080, + 0x7e040280, 0x807d907d, + 0xbf0aff7d, 0x00000060, + 0xbfa2ff88, 0xbe804100, + 0xbe824102, 0xbe844104, + 0xbe864106, 0xbe884108, + 0xbe8a410a, 0xd7610002, + 0x00010000, 0xd7610002, + 0x00010201, 0xd7610002, + 0x00010402, 0xd7610002, + 0x00010603, 0xd7610002, + 0x00010804, 0xd7610002, + 0x00010a05, 0xd7610002, + 0x00010c06, 0xd7610002, + 0x00010e07, 0xd7610002, + 0x00011008, 0xd7610002, + 0x00011209, 0xd7610002, + 0x0001140a, 0xd7610002, + 0x0001160b, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xc4068070, 0x008ce802, 0x00000000, + 0xbefe00c1, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8fb4306, 0x8b7bc17b, + 0xbfa10044, 0x8b7aff6d, + 0x80000000, 0xbfa10041, + 0x847b897b, 0xbef6007b, + 0xb8f03b05, 0x80708170, + 0xbf0d9973, 0xbfa20002, + 0x84708970, 0xbfa00001, + 0x84708a70, 0xb8fa1e06, + 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, 0x8070ff70, 0x00000080, - 0xbef90080, 0x7e040280, - 0x807d907d, 0xbf0aff7d, - 0x00000060, 0xbfa2ffbb, - 0xbe804100, 0xbe824102, - 0xbe844104, 0xbe864106, - 0xbe884108, 0xbe8a410a, - 0xd7610002, 0x0000f200, - 0x80798179, 0xd7610002, - 0x0000f201, 0x80798179, - 0xd7610002, 0x0000f202, - 0x80798179, 0xd7610002, - 0x0000f203, 0x80798179, - 0xd7610002, 0x0000f204, - 0x80798179, 0xd7610002, - 0x0000f205, 0x80798179, - 0xd7610002, 0x0000f206, - 0x80798179, 0xd7610002, - 0x0000f207, 0x80798179, - 0xd7610002, 0x0000f208, - 0x80798179, 0xd7610002, - 0x0000f209, 0x80798179, - 0xd7610002, 0x0000f20a, - 0x80798179, 0xd7610002, - 0x0000f20b, 0x80798179, - 0xc4068070, 0x008ce802, - 0x00000000, 0xbefe00c1, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8fb4306, - 0x8b7bc17b, 0xbfa10044, - 0x8b7aff6d, 0x80000000, - 0xbfa10041, 0x847b897b, - 0xbef6007b, 0xb8f03b05, - 0x80708170, 0xbf0d9973, - 0xbfa20002, 0x84708970, - 0xbfa00001, 0x84708a70, - 0xb8fa1e06, 0x847a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xd71f0000, - 0x000100c1, 0xd7200000, - 0x000200c1, 0x16000084, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbefd0080, - 0xbfa20013, 0xbe8300ff, - 0x00000080, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf8a0000, 0xc4068070, - 0x008ce801, 0x00000000, - 0x807d037d, 0x80700370, - 0xd5250000, 0x0001ff00, - 0x00000080, 0xbf0a7b7d, - 0xbfa2fff3, 0xbfa00012, - 0xbe8300ff, 0x00000100, + 0xbef600ff, 0x01000000, + 0xd71f0000, 0x000100c1, + 0xd7200000, 0x000200c1, + 0x16000084, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbefd0080, 0xbfa20013, + 0xbe8300ff, 0x00000080, 0xbf800000, 0xbf800000, 0xbf800000, 0xd8d80000, 0x01000000, 0xbf8a0000, 0xc4068070, 0x008ce801, 0x00000000, 0x807d037d, 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000100, + 0x0001ff00, 0x00000080, 0xbf0a7b7d, 0xbfa2fff3, - 0xbefe00c1, 0x857d9973, - 0x8b7d817d, 0xbf06817d, - 0xbfa20004, 0xbef000ff, - 0x00000200, 0xbeff0080, - 0xbfa00003, 0xbef000ff, - 0x00000400, 0xbeff00c1, - 0xb8fb3b05, 0x807b817b, - 0x847b827b, 0x857d9973, - 0x8b7d817d, 0xbf06817d, - 0xbfa2001b, 0xbef600ff, - 0x01000000, 0xbefd0084, - 0xbf0a7b7d, 0xbfa10040, - 0x7e008700, 0x7e028701, - 0x7e048702, 0x7e068703, - 0xc4068070, 0x008ce800, - 0x00000000, 0xc4068070, - 0x008ce801, 0x00008000, - 0xc4068070, 0x008ce802, - 0x00010000, 0xc4068070, - 0x008ce803, 0x00018000, - 0x807d847d, 0x8070ff70, - 0x00000200, 0xbf0a7b7d, - 0xbfa2ffeb, 0xbfa0002a, + 0xbfa00012, 0xbe8300ff, + 0x00000100, 0xbf800000, + 0xbf800000, 0xbf800000, + 0xd8d80000, 0x01000000, + 0xbf8a0000, 0xc4068070, + 0x008ce801, 0x00000000, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000100, 0xbf0a7b7d, + 0xbfa2fff3, 0xbefe00c1, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20004, + 0xbef000ff, 0x00000200, + 0xbeff0080, 0xbfa00003, + 0xbef000ff, 0x00000400, + 0xbeff00c1, 0xb8fb3b05, + 0x807b817b, 0x847b827b, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa2001b, 0xbef600ff, 0x01000000, 0xbefd0084, 0xbf0a7b7d, - 0xbfa10015, 0x7e008700, + 0xbfa10040, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xc4068070, 0x008ce800, 0x00000000, 0xc4068070, 0x008ce801, - 0x00010000, 0xc4068070, - 0x008ce802, 0x00020000, + 0x00008000, 0xc4068070, + 0x008ce802, 0x00010000, 0xc4068070, 0x008ce803, - 0x00030000, 0x807d847d, - 0x8070ff70, 0x00000400, + 0x00018000, 0x807d847d, + 0x8070ff70, 0x00000200, 0xbf0a7b7d, 0xbfa2ffeb, - 0xb8fb1e06, 0x8b7bc17b, - 0xbfa1000d, 0x847b837b, - 0x807b7d7b, 0xbefe00c1, - 0xbeff0080, 0x7e008700, + 0xbfa0002a, 0xbef600ff, + 0x01000000, 0xbefd0084, + 0xbf0a7b7d, 0xbfa10015, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, 0xc4068070, 0x008ce800, - 0x00000000, 0x807d817d, - 0x8070ff70, 0x00000080, - 0xbf0a7b7d, 0xbfa2fff7, - 0xbfa0016e, 0xbef4007e, - 0x8b75ff7f, 0x0000ffff, - 0x8c75ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x10807fac, 0xbef1007f, - 0xb8f20742, 0x84729972, - 0x8b6eff7f, 0x04000000, - 0xbfa1003b, 0xbefe00c1, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8ef4306, - 0x8b6fc16f, 0xbfa10030, - 0x846f896f, 0xbef6006f, + 0x00000000, 0xc4068070, + 0x008ce801, 0x00010000, + 0xc4068070, 0x008ce802, + 0x00020000, 0xc4068070, + 0x008ce803, 0x00030000, + 0x807d847d, 0x8070ff70, + 0x00000400, 0xbf0a7b7d, + 0xbfa2ffeb, 0xb8fb1e06, + 0x8b7bc17b, 0xbfa1000d, + 0x847b837b, 0x807b7d7b, + 0xbefe00c1, 0xbeff0080, + 0x7e008700, 0xc4068070, + 0x008ce800, 0x00000000, + 0x807d817d, 0x8070ff70, + 0x00000080, 0xbf0a7b7d, + 0xbfa2fff7, 0xbfa0016e, + 0xbef4007e, 0x8b75ff7f, + 0x0000ffff, 0x8c75ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x10807fac, + 0xbef1007f, 0xb8f20742, + 0x84729972, 0x8b6eff7f, + 0x04000000, 0xbfa1003b, + 0xbefe00c1, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8ef4306, 0x8b6fc16f, + 0xbfa10030, 0x846f896f, + 0xbef6006f, 0xb8f83b05, + 0x80788178, 0xbf0d9972, + 0xbfa20002, 0x84788978, + 0xbfa00001, 0x84788a78, + 0xb8ee1e06, 0x846e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbefd0080, 0xbfa2000d, + 0xc4050078, 0x0080e800, + 0x00000000, 0xbf8a0000, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000080, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff4, + 0xbfa0000c, 0xc4050078, + 0x0080e800, 0x00000000, + 0xbf8a0000, 0xdac00000, + 0x00000000, 0x807dff7d, + 0x00000100, 0x8078ff78, + 0x00000100, 0xbf0a6f7d, + 0xbfa2fff4, 0xbef80080, + 0xbefe00c1, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8ef3b05, 0x806f816f, + 0x846f826f, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbfa2002c, 0xbef600ff, + 0x01000000, 0xbeee0078, + 0x8078ff78, 0x00000200, + 0xbefd0084, 0xbf0a6f7d, + 0xbfa10061, 0xc4050078, + 0x008ce800, 0x00000000, + 0xc4050078, 0x008ce801, + 0x00008000, 0xc4050078, + 0x008ce802, 0x00010000, + 0xc4050078, 0x008ce803, + 0x00018000, 0xbf8a0000, + 0x7e008500, 0x7e028501, + 0x7e048502, 0x7e068503, + 0x807d847d, 0x8078ff78, + 0x00000200, 0xbf0a6f7d, + 0xbfa2ffea, 0xc405006e, + 0x008ce800, 0x00000000, + 0xc405006e, 0x008ce801, + 0x00008000, 0xc405006e, + 0x008ce802, 0x00010000, + 0xc405006e, 0x008ce803, + 0x00018000, 0xbf8a0000, + 0xbfa0003d, 0xbef600ff, + 0x01000000, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefd0084, 0xbf0a6f7d, + 0xbfa10016, 0xc4050078, + 0x008ce800, 0x00000000, + 0xc4050078, 0x008ce801, + 0x00010000, 0xc4050078, + 0x008ce802, 0x00020000, + 0xc4050078, 0x008ce803, + 0x00030000, 0xbf8a0000, + 0x7e008500, 0x7e028501, + 0x7e048502, 0x7e068503, + 0x807d847d, 0x8078ff78, + 0x00000400, 0xbf0a6f7d, + 0xbfa2ffea, 0xb8ef1e06, + 0x8b6fc16f, 0xbfa1000f, + 0x846f836f, 0x806f7d6f, + 0xbefe00c1, 0xbeff0080, + 0xc4050078, 0x008ce800, + 0x00000000, 0xbf8a0000, + 0x7e008500, 0x807d817d, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff6, + 0xbeff00c1, 0xc405006e, + 0x008ce800, 0x00000000, + 0xc405006e, 0x008ce801, + 0x00010000, 0xc405006e, + 0x008ce802, 0x00020000, + 0xc405006e, 0x008ce803, + 0x00030000, 0xbf8a0000, 0xb8f83b05, 0x80788178, 0xbf0d9972, 0xbfa20002, 0x84788978, 0xbfa00001, 0x84788a78, 0xb8ee1e06, 0x846e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0x8078ff78, 0x00000080, - 0xbef600ff, 0x01000000, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbefd0080, - 0xbfa2000d, 0xc4050078, - 0x0080e800, 0x00000000, - 0xbf8a0000, 0xdac00000, - 0x00000000, 0x807dff7d, - 0x00000080, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff4, 0xbfa0000c, - 0xc4050078, 0x0080e800, - 0x00000000, 0xbf8a0000, - 0xdac00000, 0x00000000, - 0x807dff7d, 0x00000100, - 0x8078ff78, 0x00000100, - 0xbf0a6f7d, 0xbfa2fff4, - 0xbef80080, 0xbefe00c1, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8ef3b05, - 0x806f816f, 0x846f826f, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa2002c, + 0x80f8ff78, 0x00000050, 0xbef600ff, 0x01000000, - 0xbeee0078, 0x8078ff78, - 0x00000200, 0xbefd0084, - 0xbf0a6f7d, 0xbfa10061, - 0xc4050078, 0x008ce800, - 0x00000000, 0xc4050078, - 0x008ce801, 0x00008000, - 0xc4050078, 0x008ce802, - 0x00010000, 0xc4050078, - 0x008ce803, 0x00018000, - 0xbf8a0000, 0x7e008500, - 0x7e028501, 0x7e048502, - 0x7e068503, 0x807d847d, + 0xbefd00ff, 0x0000006c, + 0x80f89078, 0xf462403a, + 0xf0000000, 0xbf8a0000, + 0x80fd847d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0x80f8a078, 0xf462603a, + 0xf0000000, 0xbf8a0000, + 0x80fd887d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0x80f8c078, 0xf462803a, + 0xf0000000, 0xbf8a0000, + 0x80fd907d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0xbe884308, 0xbe8a430a, + 0xbe8c430c, 0xbe8e430e, + 0xbf06807d, 0xbfa1fff0, + 0xb980f801, 0x00000000, + 0xb8f83b05, 0x80788178, + 0xbf0d9972, 0xbfa20002, + 0x84788978, 0xbfa00001, + 0x84788a78, 0xb8ee1e06, + 0x846e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0xbf0a6f7d, 0xbfa2ffea, - 0xc405006e, 0x008ce800, - 0x00000000, 0xc405006e, - 0x008ce801, 0x00008000, - 0xc405006e, 0x008ce802, - 0x00010000, 0xc405006e, - 0x008ce803, 0x00018000, - 0xbf8a0000, 0xbfa0003d, 0xbef600ff, 0x01000000, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefd0084, - 0xbf0a6f7d, 0xbfa10016, - 0xc4050078, 0x008ce800, - 0x00000000, 0xc4050078, - 0x008ce801, 0x00010000, - 0xc4050078, 0x008ce802, - 0x00020000, 0xc4050078, - 0x008ce803, 0x00030000, - 0xbf8a0000, 0x7e008500, - 0x7e028501, 0x7e048502, - 0x7e068503, 0x807d847d, - 0x8078ff78, 0x00000400, - 0xbf0a6f7d, 0xbfa2ffea, - 0xb8ef1e06, 0x8b6fc16f, - 0xbfa1000f, 0x846f836f, - 0x806f7d6f, 0xbefe00c1, - 0xbeff0080, 0xc4050078, - 0x008ce800, 0x00000000, - 0xbf8a0000, 0x7e008500, - 0x807d817d, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff6, 0xbeff00c1, - 0xc405006e, 0x008ce800, - 0x00000000, 0xc405006e, - 0x008ce801, 0x00010000, - 0xc405006e, 0x008ce802, - 0x00020000, 0xc405006e, - 0x008ce803, 0x00030000, - 0xbf8a0000, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x80f8ff78, - 0x00000050, 0xbef600ff, - 0x01000000, 0xbefd00ff, - 0x0000006c, 0x80f89078, - 0xf462403a, 0xf0000000, - 0xbf8a0000, 0x80fd847d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0x80f8a078, - 0xf462603a, 0xf0000000, - 0xbf8a0000, 0x80fd887d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0x80f8c078, - 0xf462803a, 0xf0000000, - 0xbf8a0000, 0x80fd907d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0xbe884308, - 0xbe8a430a, 0xbe8c430c, - 0xbe8e430e, 0xbf06807d, - 0xbfa1fff0, 0xb980f801, - 0x00000000, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0xbef600ff, - 0x01000000, 0xbeff0071, - 0xf4621bfa, 0xf0000000, - 0x80788478, 0xf4621b3a, + 0xbeff0071, 0xf4621bfa, 0xf0000000, 0x80788478, - 0xf4621b7a, 0xf0000000, - 0x80788478, 0xf4621c3a, + 0xf4621b3a, 0xf0000000, + 0x80788478, 0xf4621b7a, 0xf0000000, 0x80788478, - 0xf4621c7a, 0xf0000000, - 0x80788478, 0xf4621eba, + 0xf4621c3a, 0xf0000000, + 0x80788478, 0xf4621c7a, 0xf0000000, 0x80788478, - 0xf4621efa, 0xf0000000, - 0x80788478, 0xf4621e7a, + 0xf4621eba, 0xf0000000, + 0x80788478, 0xf4621efa, 0xf0000000, 0x80788478, - 0xf4621cfa, 0xf0000000, - 0x80788478, 0xf4621bba, + 0xf4621e7a, 0xf0000000, + 0x80788478, 0xf4621cfa, 0xf0000000, 0x80788478, - 0xbf8a0000, 0xb96ef814, 0xf4621bba, 0xf0000000, 0x80788478, 0xbf8a0000, - 0xb96ef815, 0xf4621bba, + 0xb96ef814, 0xf4621bba, 0xf0000000, 0x80788478, - 0xbf8a0000, 0xb96ef812, + 0xbf8a0000, 0xb96ef815, 0xf4621bba, 0xf0000000, 0x80788478, 0xbf8a0000, - 0xb96ef813, 0x8b6eff7f, - 0x04000000, 0xbfa1000d, - 0x80788478, 0xf4621bba, + 0xb96ef812, 0xf4621bba, 0xf0000000, 0x80788478, - 0xbf8a0000, 0xbf0d806e, - 0xbfa10006, 0x856e906e, - 0x8b6e6e6e, 0xbfa10003, - 0xbe804ec1, 0x816ec16e, - 0xbfa0fffb, 0xbefd006f, - 0xbefe0070, 0xbeff0071, - 0xb97b2011, 0x857b867b, - 0xb97b0191, 0x857b827b, - 0xb97bba11, 0xb973f801, - 0xb8ee3b05, 0x806e816e, - 0xbf0d9972, 0xbfa20002, - 0x846e896e, 0xbfa00001, - 0x846e8a6e, 0xb8ef1e06, - 0x846f8a6f, 0x806e6f6e, - 0x806eff6e, 0x00000200, - 0x806e746e, 0x826f8075, - 0x8b6fff6f, 0x0000ffff, - 0xf4605c37, 0xf8000050, - 0xf4605d37, 0xf8000060, - 0xf4601e77, 0xf8000074, - 0xbf8a0000, 0x8b6dff6d, - 0x0000ffff, 0x8bfe7e7e, - 0x8bea6a6a, 0xb97af804, + 0xbf8a0000, 0xb96ef813, + 0x8b6eff7f, 0x04000000, + 0xbfa1000d, 0x80788478, + 0xf4621bba, 0xf0000000, + 0x80788478, 0xbf8a0000, + 0xbf0d806e, 0xbfa10006, + 0x856e906e, 0x8b6e6e6e, + 0xbfa10003, 0xbe804ec1, + 0x816ec16e, 0xbfa0fffb, + 0xbefd006f, 0xbefe0070, + 0xbeff0071, 0xb97b2011, + 0x857b867b, 0xb97b0191, + 0x857b827b, 0xb97bba11, + 0xb973f801, 0xb8ee3b05, + 0x806e816e, 0xbf0d9972, + 0xbfa20002, 0x846e896e, + 0xbfa00001, 0x846e8a6e, + 0xb8ef1e06, 0x846f8a6f, + 0x806e6f6e, 0x806eff6e, + 0x00000200, 0x806e746e, + 0x826f8075, 0x8b6fff6f, + 0x0000ffff, 0xf4605c37, + 0xf8000050, 0xf4605d37, + 0xf8000060, 0xf4601e77, + 0xf8000074, 0xbf8a0000, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb97af804, 0xbe804ec2, + 0xbf94fffe, 0xbe804a6c, 0xbe804ec2, 0xbf94fffe, - 0xbe804a6c, 0xbe804ec2, - 0xbf94fffe, 0xbfb10000, + 0xbfb10000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, - 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm index 7b9d36e5fa43..5a1a1b1f897f 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm @@ -30,6 +30,7 @@ #define CHIP_GFX12 37 #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised +#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12) var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 @@ -351,6 +352,7 @@ L_HAVE_VGPRS: v_writelane_b32 v0, ttmp13, 0xD v_writelane_b32 v0, exec_lo, 0xE v_writelane_b32 v0, exec_hi, 0xF + valu_sgpr_hazard() s_mov_b32 exec_lo, 0x3FFF s_mov_b32 exec_hi, 0x0 @@ -417,7 +419,6 @@ L_SAVE_HWREG: v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store - s_mov_b32 m0, 0x0 //Next lane of v2 to write to // Ensure no further changes to barrier or LDS state. // STATE_PRIV.BARRIER_COMPLETE may change up to this point. @@ -430,40 +431,41 @@ L_SAVE_HWREG: s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp - write_hwreg_to_v2(s_save_m0) - write_hwreg_to_v2(s_save_pc_lo) s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK - write_hwreg_to_v2(s_save_tmp) - write_hwreg_to_v2(s_save_exec_lo) - write_hwreg_to_v2(s_save_exec_hi) - write_hwreg_to_v2(s_save_state_priv) + v_writelane_b32 v2, s_save_m0, 0x0 + v_writelane_b32 v2, s_save_pc_lo, 0x1 + v_writelane_b32 v2, s_save_tmp, 0x2 + v_writelane_b32 v2, s_save_exec_lo, 0x3 + v_writelane_b32 v2, s_save_exec_hi, 0x4 + v_writelane_b32 v2, s_save_state_priv, 0x5 + v_writelane_b32 v2, s_save_xnack_mask, 0x7 + valu_sgpr_hazard() s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) - write_hwreg_to_v2(s_save_tmp) + v_writelane_b32 v2, s_save_tmp, 0x6 - write_hwreg_to_v2(s_save_xnack_mask) + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_MODE) + v_writelane_b32 v2, s_save_tmp, 0x8 - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_MODE) - write_hwreg_to_v2(s_save_m0) + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) + v_writelane_b32 v2, s_save_tmp, 0x9 - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) - write_hwreg_to_v2(s_save_m0) + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) + v_writelane_b32 v2, s_save_tmp, 0xA - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) - write_hwreg_to_v2(s_save_m0) + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) + v_writelane_b32 v2, s_save_tmp, 0xB - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) - write_hwreg_to_v2(s_save_m0) - - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL) - write_hwreg_to_v2(s_save_m0) + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_TRAP_CTRL) + v_writelane_b32 v2, s_save_tmp, 0xC s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) - write_hwreg_to_v2(s_save_tmp) + v_writelane_b32 v2, s_save_tmp, 0xD s_get_barrier_state s_save_tmp, -1 s_wait_kmcnt (0) - write_hwreg_to_v2(s_save_tmp) + v_writelane_b32 v2, s_save_tmp, 0xE + valu_sgpr_hazard() // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. s_mov_b32 exec_lo, 0xFFFF @@ -497,10 +499,12 @@ L_SAVE_SGPR_LOOP: s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] - write_16sgpr_to_v2(s0) - - s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled? - s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE + s_cmp_eq_u32 ttmp13, 0x0 + s_cbranch_scc0 L_WRITE_V2_SECOND_HALF + write_16sgpr_to_v2(s0, 0x0) + s_branch L_SAVE_SGPR_SKIP_TCP_STORE +L_WRITE_V2_SECOND_HALF: + write_16sgpr_to_v2(s0, 0x10) buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80 @@ -1056,27 +1060,21 @@ L_END_PGM: s_endpgm_saved end -function write_hwreg_to_v2(s) - // Copy into VGPR for later TCP store. - v_writelane_b32 v2, s, m0 - s_add_u32 m0, m0, 0x1 -end - - -function write_16sgpr_to_v2(s) +function write_16sgpr_to_v2(s, lane_offset) // Copy into VGPR for later TCP store. for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++ - v_writelane_b32 v2, s[sgpr_idx], ttmp13 - s_add_u32 ttmp13, ttmp13, 0x1 + v_writelane_b32 v2, s[sgpr_idx], sgpr_idx + lane_offset end + valu_sgpr_hazard() + s_add_u32 ttmp13, ttmp13, 0x10 end function write_12sgpr_to_v2(s) // Copy into VGPR for later TCP store. for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++ - v_writelane_b32 v2, s[sgpr_idx], ttmp13 - s_add_u32 ttmp13, ttmp13, 0x1 + v_writelane_b32 v2, s[sgpr_idx], sgpr_idx end + valu_sgpr_hazard() end function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) @@ -1128,3 +1126,11 @@ function get_wave_size2(s_reg) s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE end + +function valu_sgpr_hazard +#if HAVE_VALU_SGPR_HAZARD + for var rep = 0; rep < 8; rep ++ + ds_nop + end +#endif +end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 12456c61ffa5..ba99e0f258ae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -357,12 +357,12 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en) return 0; if (!pdd->proc_ctx_cpu_ptr) { - r = amdgpu_amdkfd_alloc_gtt_mem(adev, - AMDGPU_MES_PROC_CTX_SIZE, - &pdd->proc_ctx_bo, - &pdd->proc_ctx_gpu_addr, - &pdd->proc_ctx_cpu_ptr, - false); + r = amdgpu_amdkfd_alloc_gtt_mem(adev, + AMDGPU_MES_PROC_CTX_SIZE, + &pdd->proc_ctx_bo, + &pdd->proc_ctx_gpu_addr, + &pdd->proc_ctx_cpu_ptr, + false); if (r) { dev_err(adev->dev, "failed to allocate process context bo\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 2afcc1b4856a..2ed003d3ff0e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -43,6 +43,8 @@ /* Size of the per-pipe EOP queue */ #define CIK_HPD_EOP_BYTES_LOG2 11 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) +/* See unmap_queues_cpsch() */ +#define USE_DEFAULT_GRACE_PERIOD 0xffffffff static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, unsigned int vmid); @@ -1219,11 +1221,13 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, decrement_queue_count(dqm, qpd, q); if (dqm->dev->kfd->shared_resources.enable_mes) { - retval = remove_queue_mes(dqm, q, qpd); - if (retval) { + int err; + + err = remove_queue_mes(dqm, q, qpd); + if (err) { dev_err(dev, "Failed to evict queue %d\n", q->properties.queue_id); - goto out; + retval = err; } } } @@ -1746,10 +1750,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) init_sdma_bitmaps(dqm); - if (dqm->dev->kfd2kgd->get_iq_wait_times) - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, - &dqm->wait_times, - ffs(dqm->dev->xcc_mask) - 1); + update_dqm_wait_times(dqm); return 0; } @@ -1845,25 +1846,11 @@ static int start_cpsch(struct device_queue_manager *dqm) /* clear hang status when driver try to start the hw scheduler */ dqm->sched_running = true; - if (!dqm->dev->kfd->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) { + if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, + KFD_DEQUEUE_WAIT_INIT, 0 /* unused */)) + dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n"); execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); - - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ - if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu && - (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) { - uint32_t reg_offset = 0; - uint32_t grace_period = 1; - - retval = pm_update_grace_period(&dqm->packet_mgr, - grace_period); - if (retval) - dev_err(dev, "Setting grace timeout failed\n"); - else if (dqm->dev->kfd2kgd->build_grace_period_packet_info) - /* Update dqm->wait_times maintained in software */ - dqm->dev->kfd2kgd->build_grace_period_packet_info( - dqm->dev->adev, dqm->wait_times, - grace_period, ®_offset, - &dqm->wait_times); } /* setup per-queue reset detection buffer */ @@ -2359,7 +2346,14 @@ static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sd return is_sdma ? reset_hung_queues_sdma(dqm) : reset_hung_queues(dqm); } -/* dqm->lock mutex has to be locked before calling this function */ +/* dqm->lock mutex has to be locked before calling this function + * + * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time + * for context switch latency. Lower values are used by debugger + * since context switching are triggered at high frequency. + * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE + * + */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, uint32_t filter_param, @@ -2378,7 +2372,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, return -EIO; if (grace_period != USE_DEFAULT_GRACE_PERIOD) { - retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); + retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr, + KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period); if (retval) goto out; } @@ -2419,8 +2414,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, /* We need to reset the grace period value for this device */ if (grace_period != USE_DEFAULT_GRACE_PERIOD) { - if (pm_update_grace_period(&dqm->packet_mgr, - USE_DEFAULT_GRACE_PERIOD)) + if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, + KFD_DEQUEUE_WAIT_RESET, 0 /* unused */)) dev_err(dev, "Failed to reset grace period\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 122eb745e9c4..74a61b5b2f0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -37,7 +37,6 @@ #define KFD_MES_PROCESS_QUANTUM 100000 #define KFD_MES_GANG_QUANTUM 10000 -#define USE_DEFAULT_GRACE_PERIOD 0xffffffff struct device_process_node { struct qcm_process_device *qpd; @@ -360,4 +359,14 @@ static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val /* SDMA activity counter is stored at queue's RPTR + 0x8 location. */ return get_user(*val, q_rptr + 1); } + +static inline void update_dqm_wait_times(struct device_queue_manager *dqm) +{ + if (dqm->dev->kfd2kgd->get_iq_wait_times) + dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, + &dqm->wait_times, + ffs(dqm->dev->xcc_mask) - 1); +} + + #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index d794c8172b40..9fcc8c6e57b7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -71,8 +71,7 @@ static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm, qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) || - KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) + KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4)) qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT); if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 3014925d95ff..80320a6c8854 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -554,7 +554,7 @@ static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd, m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; - if (amdgpu_sriov_vf(mm->dev->adev)) + if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) m->cp_hqd_pq_doorbell_control |= 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT; m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev); @@ -667,7 +667,9 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd, get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc); init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q); - + if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) + m->cp_hqd_pq_doorbell_control |= 1 << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT; m->cp_mqd_stride_size = offset; /* @@ -727,6 +729,9 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd + size * xcc); update_mqd(mm, m, q, minfo); + if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) + m->cp_hqd_pq_doorbell_control |= 1 << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT; update_cu_mask(mm, m, minfo, xcc); if (q->format == KFD_QUEUE_FORMAT_AQL) { @@ -749,6 +754,21 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, } } +static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *qp, + const void *mqd_src, + const void *ctl_stack_src, u32 ctl_stack_size) +{ + restore_mqd(mm, mqd, mqd_mem_obj, gart_addr, qp, mqd_src, ctl_stack_src, ctl_stack_size); + if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) { + struct v9_mqd *m; + + m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr; + m->cp_hqd_pq_doorbell_control |= 1 << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT; + } +} static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) @@ -883,7 +903,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->is_occupied = kfd_is_occupied_cp; mqd->get_checkpoint_info = get_checkpoint_info; mqd->checkpoint_mqd = checkpoint_mqd; - mqd->restore_mqd = restore_mqd; mqd->mqd_size = sizeof(struct v9_mqd); mqd->mqd_stride = mqd_stride_v9; #if defined(CONFIG_DEBUG_FS) @@ -895,12 +914,14 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_v9_4_3; mqd->load_mqd = load_mqd_v9_4_3; mqd->update_mqd = update_mqd_v9_4_3; + mqd->restore_mqd = restore_mqd_v9_4_3; mqd->destroy_mqd = destroy_mqd_v9_4_3; mqd->get_wave_state = get_wave_state_v9_4_3; } else { mqd->init_mqd = init_mqd; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd; + mqd->restore_mqd = restore_mqd; mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->get_wave_state = get_wave_state; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 4984b41cd372..3f574d82b5fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -396,14 +396,29 @@ out: return retval; } -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) +/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts + * by writing to CP_IQ_WAIT_TIME2 registers. + * + * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition + * @value: Depends on the cmd. This parameter is unused for + * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For + * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set + * + */ +int pm_config_dequeue_wait_counts(struct packet_manager *pm, + enum kfd_config_dequeue_wait_counts_cmd cmd, + uint32_t value) { struct kfd_node *node = pm->dqm->dev; struct device *dev = node->adev->dev; int retval = 0; uint32_t *buffer, size; - size = pm->pmf->set_grace_period_size; + if (!pm->pmf->config_dequeue_wait_counts || + !pm->pmf->config_dequeue_wait_counts_size) + return 0; + + size = pm->pmf->config_dequeue_wait_counts_size; mutex_lock(&pm->lock); @@ -419,13 +434,18 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) goto out; } - retval = pm->pmf->set_grace_period(pm, buffer, grace_period); + retval = pm->pmf->config_dequeue_wait_counts(pm, buffer, + cmd, value); if (!retval) retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); } + /* If default value is modified, cache that value in dqm->wait_times */ + if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) + update_dqm_wait_times(pm->dqm); + out: mutex_unlock(&pm->lock); return retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index d56525201155..d440df602393 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -297,23 +297,71 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, return 0; } -static int pm_set_grace_period_v9(struct packet_manager *pm, +static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm, + uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset, + uint32_t *reg_data) +{ + pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info( + pm->dqm->dev->adev, + pm->dqm->wait_times, + sch_value, + que_sleep, + reg_offset, + reg_data); +} + +static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, uint32_t *buffer, - uint32_t grace_period) + enum kfd_config_dequeue_wait_counts_cmd cmd, + uint32_t value) { struct pm4_mec_write_data_mmio *packet; uint32_t reg_offset = 0; uint32_t reg_data = 0; - pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( - pm->dqm->dev->adev, - pm->dqm->wait_times, - grace_period, - ®_offset, - ®_data); + switch (cmd) { + case KFD_DEQUEUE_WAIT_INIT: { + uint32_t sch_wave = 0, que_sleep = 0; + /* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40. + * On a 1GHz machine this is roughly 1 microsecond, which is + * about how long it takes to load data out of memory during + * queue connect + * QUE_SLEEP: Wait Count for Dequeue Retry. + */ + if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) && + KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) { + que_sleep = 1; + + /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ + if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && + (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) + sch_wave = 1; + } else { + return 0; + } + pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep, + ®_offset, ®_data); + + break; + } + case KFD_DEQUEUE_WAIT_RESET: + /* reg_data would be set to dqm->wait_times */ + pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, ®_offset, ®_data); + break; - if (grace_period == USE_DEFAULT_GRACE_PERIOD) - reg_data = pm->dqm->wait_times; + case KFD_DEQUEUE_WAIT_SET_SCH_WAVE: + /* The CP cannot handle value 0 and it will result in + * an infinite grace period being set so set to 1 to prevent this. Also + * avoid debugger API breakage as it sets 0 and expects a low value. + */ + if (!value) + value = 1; + pm_build_dequeue_wait_counts_packet_info(pm, value, 0, ®_offset, ®_data); + break; + default: + pr_err("Invalid dequeue wait cmd\n"); + return -EINVAL; + } packet = (struct pm4_mec_write_data_mmio *)buffer; memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); @@ -415,7 +463,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, - .set_grace_period = pm_set_grace_period_v9, + .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9, .query_status = pm_query_status_v9, .release_mem = NULL, .map_process_size = sizeof(struct pm4_mes_map_process), @@ -423,7 +471,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), - .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), + .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio), .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; @@ -434,7 +482,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, - .set_grace_period = pm_set_grace_period_v9, + .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9, .query_status = pm_query_status_v9, .release_mem = NULL, .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran), @@ -442,7 +490,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), - .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), + .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio), .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c index 347c86e1c378..a1de5d7e173a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c @@ -304,7 +304,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = { .set_resources = pm_set_resources_vi, .map_queues = pm_map_queues_vi, .unmap_queues = pm_unmap_queues_vi, - .set_grace_period = NULL, + .config_dequeue_wait_counts = NULL, .query_status = pm_query_status_vi, .release_mem = pm_release_mem_vi, .map_process_size = sizeof(struct pm4_mes_map_process), @@ -312,7 +312,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), - .set_grace_period_size = 0, + .config_dequeue_wait_counts_size = 0, .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = sizeof(struct pm4_mec_release_mem) }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bb09c873a9a5..f6aedf69c644 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1389,6 +1389,24 @@ int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm, #define KFD_FENCE_COMPLETED (100) #define KFD_FENCE_INIT (10) +/** + * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring + * dequeue wait counts. + * + * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a + * certain ASICs. For these ASICs, this is default value used by RESET + * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized value + * for certain ASICs. For others set it to default hardware reset value + * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait + * + */ +enum kfd_config_dequeue_wait_counts_cmd { + KFD_DEQUEUE_WAIT_INIT = 1, + KFD_DEQUEUE_WAIT_RESET = 2, + KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3 +}; + + struct packet_manager { struct device_queue_manager *dqm; struct kernel_queue *priv_queue; @@ -1414,8 +1432,8 @@ struct packet_manager_funcs { int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, enum kfd_unmap_queues_filter mode, uint32_t filter_param, bool reset); - int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer, - uint32_t grace_period); + int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t *buffer, + enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t value); int (*query_status)(struct packet_manager *pm, uint32_t *buffer, uint64_t fence_address, uint64_t fence_value); int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); @@ -1426,7 +1444,7 @@ struct packet_manager_funcs { int set_resources_size; int map_queues_size; int unmap_queues_size; - int set_grace_period_size; + int config_dequeue_wait_counts_size; int query_status_size; int release_mem_size; }; @@ -1449,7 +1467,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, void pm_release_ib(struct packet_manager *pm); -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period); +int pm_config_dequeue_wait_counts(struct packet_manager *pm, + enum kfd_config_dequeue_wait_counts_cmd cmd, + uint32_t wait_counts_config); /* Following PM funcs can be shared among VI and AI */ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 1a38ac75abbd..100717a98ec1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1287,13 +1287,7 @@ svm_range_get_pte_flags(struct kfd_node *node, break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): - if (domain == SVM_RANGE_VRAM_DOMAIN) { - if (bo_node != node) - mapping_flags |= AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } + mapping_flags |= AMDGPU_VM_MTYPE_NC; break; default: mapping_flags |= coherent ? @@ -3009,19 +3003,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - /* check if this page fault time stamp is before svms->checkpoint_ts */ - if (svms->checkpoint_ts[gpuidx] != 0) { - if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) { - pr_debug("draining retry fault, drop fault 0x%llx\n", addr); - r = 0; - goto out; - } else - /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts - * to zero to avoid following ts wrap around give wrong comparing - */ - svms->checkpoint_ts[gpuidx] = 0; - } - if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); r = -EFAULT; @@ -3041,6 +3022,21 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); + + /* check if this page fault time stamp is before svms->checkpoint_ts */ + if (svms->checkpoint_ts[gpuidx] != 0) { + if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) { + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = -EAGAIN; + goto out_unlock_svms; + } else { + /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts + * to zero to avoid following ts wrap around give wrong comparing + */ + svms->checkpoint_ts[gpuidx] = 0; + } + } + prange = svm_range_from_addr(svms, addr, NULL); if (!prange) { pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", @@ -3166,7 +3162,8 @@ out_unlock_svms: mutex_unlock(&svms->lock); mmap_read_unlock(mm); - svm_range_count_fault(node, p, gpuidx); + if (r != -EAGAIN) + svm_range_count_fault(node, p, gpuidx); mmput(mm); out: |
