summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorAkhil P Oommen <quic_akhilpo@quicinc.com>2024-10-27 23:35:47 +0530
committerRob Clark <robdclark@chromium.org>2024-10-28 09:31:33 -0700
commitd6d1ad32d00714ecf9f1996173c6f98e43c5b022 (patch)
tree10658eb46ed1c339dbad9a1cefe0b01512446556 /drivers/gpu/drm
parent8f32ddd87e499ba6d2dc74ce30b6932baf1e1fc3 (diff)
downloadlwn-d6d1ad32d00714ecf9f1996173c6f98e43c5b022.tar.gz
lwn-d6d1ad32d00714ecf9f1996173c6f98e43c5b022.zip
drm/msm/a6xx: Fix excessive stack usage
Clang-19 and above sometimes end up with multiple copies of the large a6xx_hfi_msg_bw_table structure on the stack. The problem is that a6xx_hfi_send_bw_table() calls a number of device specific functions to fill the structure, but these create another copy of the structure on the stack which gets copied to the first. If the functions get inlined, that busts the warning limit: drivers/gpu/drm/msm/adreno/a6xx_hfi.c:631:12: error: stack frame size (1032) exceeds limit (1024) in 'a6xx_hfi_send_bw_table' [-Werror,-Wframe-larger-than] Fix this by kmalloc-ating struct a6xx_hfi_msg_bw_table instead of using the stack. Also, use this opportunity to skip re-initializing this table to optimize gpu wake up latency. Cc: Arnd Bergmann <arnd@kernel.org> Signed-off-by: Akhil P Oommen <quic_akhilpo@quicinc.com> Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> Patchwork: https://patchwork.freedesktop.org/patch/621814/ Signed-off-by: Rob Clark <robdclark@chromium.org>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.h1
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_hfi.c36
2 files changed, 24 insertions, 13 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index 94b6c5cab6f4..b4a79f88ccf4 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -99,6 +99,7 @@ struct a6xx_gmu {
struct completion pd_gate;
struct qmp *qmp;
+ struct a6xx_hfi_msg_bw_table *bw_table;
};
static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
index f1196d66055c..cb8844ed46b2 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
@@ -661,34 +661,44 @@ static void a6xx_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu)
{
- struct a6xx_hfi_msg_bw_table msg = { 0 };
+ struct a6xx_hfi_msg_bw_table *msg;
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+ if (gmu->bw_table)
+ goto send;
+
+ msg = devm_kzalloc(gmu->dev, sizeof(*msg), GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
if (adreno_is_a618(adreno_gpu))
- a618_build_bw_table(&msg);
+ a618_build_bw_table(msg);
else if (adreno_is_a619(adreno_gpu))
- a619_build_bw_table(&msg);
+ a619_build_bw_table(msg);
else if (adreno_is_a640_family(adreno_gpu))
- a640_build_bw_table(&msg);
+ a640_build_bw_table(msg);
else if (adreno_is_a650(adreno_gpu))
- a650_build_bw_table(&msg);
+ a650_build_bw_table(msg);
else if (adreno_is_7c3(adreno_gpu))
- adreno_7c3_build_bw_table(&msg);
+ adreno_7c3_build_bw_table(msg);
else if (adreno_is_a660(adreno_gpu))
- a660_build_bw_table(&msg);
+ a660_build_bw_table(msg);
else if (adreno_is_a663(adreno_gpu))
- a663_build_bw_table(&msg);
+ a663_build_bw_table(msg);
else if (adreno_is_a690(adreno_gpu))
- a690_build_bw_table(&msg);
+ a690_build_bw_table(msg);
else if (adreno_is_a730(adreno_gpu))
- a730_build_bw_table(&msg);
+ a730_build_bw_table(msg);
else if (adreno_is_a740_family(adreno_gpu))
- a740_build_bw_table(&msg);
+ a740_build_bw_table(msg);
else
- a6xx_build_bw_table(&msg);
+ a6xx_build_bw_table(msg);
+
+ gmu->bw_table = msg;
- return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_BW_TABLE, &msg, sizeof(msg),
+send:
+ return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_BW_TABLE, gmu->bw_table, sizeof(*(gmu->bw_table)),
NULL, 0);
}