From 7e72fc41d4243800206ff76615cfebb15d632027 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Jan 2024 12:49:57 +0100 Subject: thermal: netlink: Pass pointers to thermal_notify_tz_trip_change() Instead of requiring the caller of thermal_notify_tz_trip_change() to provide specific values needed to populate struct param in it, make it extract those values from objects passed to it by the caller via const pointers. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Reviewed-by: Daniel Lezcano --- drivers/thermal/thermal_netlink.c | 14 ++++++++------ drivers/thermal/thermal_netlink.h | 11 +++++++---- drivers/thermal/thermal_trip.c | 8 ++------ 3 files changed, 17 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index 332052e24a86..4a2c299c0462 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -377,12 +377,14 @@ int thermal_notify_tz_trip_delete(int tz_id, int trip_id) return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_TRIP_DELETE, &p); } -int thermal_notify_tz_trip_change(int tz_id, int trip_id, int trip_type, - int trip_temp, int trip_hyst) -{ - struct param p = { .tz_id = tz_id, .trip_id = trip_id, - .trip_type = trip_type, .trip_temp = trip_temp, - .trip_hyst = trip_hyst }; +int thermal_notify_tz_trip_change(const struct thermal_zone_device *tz, + const struct thermal_trip *trip) +{ + struct param p = { .tz_id = tz->id, + .trip_id = thermal_zone_trip_id(tz, trip), + .trip_type = trip->type, + .trip_temp = trip->temperature, + .trip_hyst = trip->hysteresis }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_TRIP_CHANGE, &p); } diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h index 0a9987c3bc57..bb4c47d685e3 100644 --- a/drivers/thermal/thermal_netlink.h +++ b/drivers/thermal/thermal_netlink.h @@ -10,6 +10,9 @@ struct thermal_genl_cpu_caps { int efficiency; }; +struct thermal_zone_device; +struct thermal_trip; + /* Netlink notification function */ #ifdef CONFIG_THERMAL_NETLINK int __init thermal_netlink_init(void); @@ -23,8 +26,8 @@ int thermal_notify_tz_trip_up(int tz_id, int id, int temp); int thermal_notify_tz_trip_delete(int tz_id, int id); int thermal_notify_tz_trip_add(int tz_id, int id, int type, int temp, int hyst); -int thermal_notify_tz_trip_change(int tz_id, int id, int type, - int temp, int hyst); +int thermal_notify_tz_trip_change(const struct thermal_zone_device *tz, + const struct thermal_trip *trip); int thermal_notify_cdev_state_update(int cdev_id, int state); int thermal_notify_cdev_add(int cdev_id, const char *name, int max_state); int thermal_notify_cdev_delete(int cdev_id); @@ -79,8 +82,8 @@ static inline int thermal_notify_tz_trip_add(int tz_id, int id, int type, return 0; } -static inline int thermal_notify_tz_trip_change(int tz_id, int id, int type, - int temp, int hyst) +static inline int thermal_notify_tz_trip_change(const struct thermal_zone_device *tz, + const struct thermal_trip *trip) { return 0; } diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c index 8bffa1e5e206..c875a26d5adf 100644 --- a/drivers/thermal/thermal_trip.c +++ b/drivers/thermal/thermal_trip.c @@ -155,9 +155,7 @@ int thermal_zone_trip_id(const struct thermal_zone_device *tz, void thermal_zone_trip_updated(struct thermal_zone_device *tz, const struct thermal_trip *trip) { - thermal_notify_tz_trip_change(tz->id, thermal_zone_trip_id(tz, trip), - trip->type, trip->temperature, - trip->hysteresis); + thermal_notify_tz_trip_change(tz, trip); __thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED); } @@ -168,8 +166,6 @@ void thermal_zone_set_trip_temp(struct thermal_zone_device *tz, return; trip->temperature = temp; - thermal_notify_tz_trip_change(tz->id, thermal_zone_trip_id(tz, trip), - trip->type, trip->temperature, - trip->hysteresis); + thermal_notify_tz_trip_change(tz, trip); } EXPORT_SYMBOL_GPL(thermal_zone_set_trip_temp); -- cgit v1.2.3 From f52557edf0648b471e2006f9377ea0ba4f73f9b2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 15 Dec 2023 20:57:50 +0100 Subject: thermal: netlink: Pass pointers to thermal_notify_tz_trip_up/down() Instead of requiring the callers of thermal_notify_tz_trip_up/down() to provide specific values needed to populate struct param in them, make them extract those values from objects passed by the callers via const pointers. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Reviewed-by: Daniel Lezcano --- drivers/thermal/thermal_core.c | 8 ++------ drivers/thermal/thermal_netlink.c | 14 ++++++++++---- drivers/thermal/thermal_netlink.h | 12 ++++++++---- 3 files changed, 20 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index fa88d8707241..a7e2008d34c9 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -381,9 +381,7 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, * the threshold and the trip temperature will be equal. */ if (tz->temperature >= trip->temperature) { - thermal_notify_tz_trip_up(tz->id, - thermal_zone_trip_id(tz, trip), - tz->temperature); + thermal_notify_tz_trip_up(tz, trip); trip->threshold = trip->temperature - trip->hysteresis; } else { trip->threshold = trip->temperature; @@ -400,9 +398,7 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, * the trip. */ if (tz->temperature < trip->temperature - trip->hysteresis) { - thermal_notify_tz_trip_down(tz->id, - thermal_zone_trip_id(tz, trip), - tz->temperature); + thermal_notify_tz_trip_down(tz, trip); trip->threshold = trip->temperature; } else { trip->threshold = trip->temperature - trip->hysteresis; diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index 4a2c299c0462..46e4ea45d67d 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -346,16 +346,22 @@ int thermal_notify_tz_disable(int tz_id) return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_DISABLE, &p); } -int thermal_notify_tz_trip_down(int tz_id, int trip_id, int temp) +int thermal_notify_tz_trip_down(const struct thermal_zone_device *tz, + const struct thermal_trip *trip) { - struct param p = { .tz_id = tz_id, .trip_id = trip_id, .temp = temp }; + struct param p = { .tz_id = tz->id, + .trip_id = thermal_zone_trip_id(tz, trip), + .temp = tz->temperature }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_TRIP_DOWN, &p); } -int thermal_notify_tz_trip_up(int tz_id, int trip_id, int temp) +int thermal_notify_tz_trip_up(const struct thermal_zone_device *tz, + const struct thermal_trip *trip) { - struct param p = { .tz_id = tz_id, .trip_id = trip_id, .temp = temp }; + struct param p = { .tz_id = tz->id, + .trip_id = thermal_zone_trip_id(tz, trip), + .temp = tz->temperature }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_TRIP_UP, &p); } diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h index bb4c47d685e3..907dc88d3317 100644 --- a/drivers/thermal/thermal_netlink.h +++ b/drivers/thermal/thermal_netlink.h @@ -21,8 +21,10 @@ int thermal_notify_tz_create(int tz_id, const char *name); int thermal_notify_tz_delete(int tz_id); int thermal_notify_tz_enable(int tz_id); int thermal_notify_tz_disable(int tz_id); -int thermal_notify_tz_trip_down(int tz_id, int id, int temp); -int thermal_notify_tz_trip_up(int tz_id, int id, int temp); +int thermal_notify_tz_trip_down(const struct thermal_zone_device *tz, + const struct thermal_trip *trip); +int thermal_notify_tz_trip_up(const struct thermal_zone_device *tz, + const struct thermal_trip *trip); int thermal_notify_tz_trip_delete(int tz_id, int id); int thermal_notify_tz_trip_add(int tz_id, int id, int type, int temp, int hyst); @@ -61,12 +63,14 @@ static inline int thermal_notify_tz_disable(int tz_id) return 0; } -static inline int thermal_notify_tz_trip_down(int tz_id, int id, int temp) +static inline int thermal_notify_tz_trip_down(const struct thermal_zone_device *tz, + const struct thermal_trip *trip) { return 0; } -static inline int thermal_notify_tz_trip_up(int tz_id, int id, int temp) +static inline int thermal_notify_tz_trip_up(const struct thermal_zone_device *tz, + const struct thermal_trip *trip) { return 0; } -- cgit v1.2.3 From 4ae535f37d0e3c5731f90c385e883c0bada59fc9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 15 Dec 2023 20:59:08 +0100 Subject: thermal: netlink: Drop thermal_notify_tz_trip_add/delete() Because thermal_notify_tz_trip_add/delete() are never used, drop them entirely along with the related code. The addition or removal of trip points is not supported by the thermal core and is unlikely to be supported in the future, so it is also unlikely that these functions will ever be needed. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Acked-by: Daniel Lezcano --- drivers/thermal/thermal_netlink.c | 33 +-------------------------------- drivers/thermal/thermal_netlink.h | 14 -------------- 2 files changed, 1 insertion(+), 46 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index 46e4ea45d67d..91992181e845 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -148,7 +148,7 @@ static int thermal_genl_event_tz_trip_up(struct param *p) return 0; } -static int thermal_genl_event_tz_trip_add(struct param *p) +static int thermal_genl_event_tz_trip_change(struct param *p) { if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id) || nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_TRIP_ID, p->trip_id) || @@ -160,15 +160,6 @@ static int thermal_genl_event_tz_trip_add(struct param *p) return 0; } -static int thermal_genl_event_tz_trip_delete(struct param *p) -{ - if (nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id) || - nla_put_u32(p->msg, THERMAL_GENL_ATTR_TZ_TRIP_ID, p->trip_id)) - return -EMSGSIZE; - - return 0; -} - static int thermal_genl_event_cdev_add(struct param *p) { if (nla_put_string(p->msg, THERMAL_GENL_ATTR_CDEV_NAME, @@ -258,9 +249,6 @@ int thermal_genl_event_tz_disable(struct param *p) int thermal_genl_event_tz_trip_down(struct param *p) __attribute__((alias("thermal_genl_event_tz_trip_up"))); -int thermal_genl_event_tz_trip_change(struct param *p) - __attribute__((alias("thermal_genl_event_tz_trip_add"))); - static cb_t event_cb[] = { [THERMAL_GENL_EVENT_TZ_CREATE] = thermal_genl_event_tz_create, [THERMAL_GENL_EVENT_TZ_DELETE] = thermal_genl_event_tz_delete, @@ -269,8 +257,6 @@ static cb_t event_cb[] = { [THERMAL_GENL_EVENT_TZ_TRIP_UP] = thermal_genl_event_tz_trip_up, [THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = thermal_genl_event_tz_trip_down, [THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = thermal_genl_event_tz_trip_change, - [THERMAL_GENL_EVENT_TZ_TRIP_ADD] = thermal_genl_event_tz_trip_add, - [THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = thermal_genl_event_tz_trip_delete, [THERMAL_GENL_EVENT_CDEV_ADD] = thermal_genl_event_cdev_add, [THERMAL_GENL_EVENT_CDEV_DELETE] = thermal_genl_event_cdev_delete, [THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = thermal_genl_event_cdev_state_update, @@ -366,23 +352,6 @@ int thermal_notify_tz_trip_up(const struct thermal_zone_device *tz, return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_TRIP_UP, &p); } -int thermal_notify_tz_trip_add(int tz_id, int trip_id, int trip_type, - int trip_temp, int trip_hyst) -{ - struct param p = { .tz_id = tz_id, .trip_id = trip_id, - .trip_type = trip_type, .trip_temp = trip_temp, - .trip_hyst = trip_hyst }; - - return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_TRIP_ADD, &p); -} - -int thermal_notify_tz_trip_delete(int tz_id, int trip_id) -{ - struct param p = { .tz_id = tz_id, .trip_id = trip_id }; - - return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_TRIP_DELETE, &p); -} - int thermal_notify_tz_trip_change(const struct thermal_zone_device *tz, const struct thermal_trip *trip) { diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h index 907dc88d3317..e780ce3f7db6 100644 --- a/drivers/thermal/thermal_netlink.h +++ b/drivers/thermal/thermal_netlink.h @@ -25,9 +25,6 @@ int thermal_notify_tz_trip_down(const struct thermal_zone_device *tz, const struct thermal_trip *trip); int thermal_notify_tz_trip_up(const struct thermal_zone_device *tz, const struct thermal_trip *trip); -int thermal_notify_tz_trip_delete(int tz_id, int id); -int thermal_notify_tz_trip_add(int tz_id, int id, int type, - int temp, int hyst); int thermal_notify_tz_trip_change(const struct thermal_zone_device *tz, const struct thermal_trip *trip); int thermal_notify_cdev_state_update(int cdev_id, int state); @@ -75,17 +72,6 @@ static inline int thermal_notify_tz_trip_up(const struct thermal_zone_device *tz return 0; } -static inline int thermal_notify_tz_trip_delete(int tz_id, int id) -{ - return 0; -} - -static inline int thermal_notify_tz_trip_add(int tz_id, int id, int type, - int temp, int hyst) -{ - return 0; -} - static inline int thermal_notify_tz_trip_change(const struct thermal_zone_device *tz, const struct thermal_trip *trip) { -- cgit v1.2.3 From 2f521890aa5b8a5619d31a95caec0b08d4cb9e1a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Jan 2024 12:59:10 +0100 Subject: thermal: netlink: Pass thermal zone pointer to notify routines There are several rountines in the thermal netlink API that take a thermal zone ID or a thermal zone type as their arguments, but from their callers perspective it would be more convenient to pass a thermal zone pointer to them and let them extract the necessary data from the given thermal zone object by themselves. Modify the code accordingly. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Acked-by: Daniel Lezcano --- drivers/thermal/thermal_core.c | 13 +++++-------- drivers/thermal/thermal_netlink.c | 21 +++++++++++---------- drivers/thermal/thermal_netlink.h | 22 ++++++++++++---------- 3 files changed, 28 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index a7e2008d34c9..c0b012e075f3 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -211,7 +211,7 @@ exit: mutex_unlock(&tz->lock); mutex_unlock(&thermal_governor_lock); - thermal_notify_tz_gov_change(tz->id, policy); + thermal_notify_tz_gov_change(tz, policy); return ret; } @@ -501,9 +501,9 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz, mutex_unlock(&tz->lock); if (mode == THERMAL_DEVICE_ENABLED) - thermal_notify_tz_enable(tz->id); + thermal_notify_tz_enable(tz); else - thermal_notify_tz_disable(tz->id); + thermal_notify_tz_disable(tz); return ret; } @@ -1407,7 +1407,7 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t if (atomic_cmpxchg(&tz->need_update, 1, 0)) thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); - thermal_notify_tz_create(tz->id, tz->type); + thermal_notify_tz_create(tz); return tz; @@ -1466,15 +1466,12 @@ EXPORT_SYMBOL_GPL(thermal_zone_device); */ void thermal_zone_device_unregister(struct thermal_zone_device *tz) { - int tz_id; struct thermal_cooling_device *cdev; struct thermal_zone_device *pos = NULL; if (!tz) return; - tz_id = tz->id; - mutex_lock(&thermal_list_lock); list_for_each_entry(pos, &thermal_tz_list, node) if (pos == tz) @@ -1510,7 +1507,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) put_device(&tz->device); - thermal_notify_tz_delete(tz_id); + thermal_notify_tz_delete(tz); wait_for_completion(&tz->removal); kfree(tz); diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index 91992181e845..46b0156a6319 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -304,30 +304,30 @@ out_free_msg: return ret; } -int thermal_notify_tz_create(int tz_id, const char *name) +int thermal_notify_tz_create(const struct thermal_zone_device *tz) { - struct param p = { .tz_id = tz_id, .name = name }; + struct param p = { .tz_id = tz->id, .name = tz->type }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_CREATE, &p); } -int thermal_notify_tz_delete(int tz_id) +int thermal_notify_tz_delete(const struct thermal_zone_device *tz) { - struct param p = { .tz_id = tz_id }; + struct param p = { .tz_id = tz->id }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_DELETE, &p); } -int thermal_notify_tz_enable(int tz_id) +int thermal_notify_tz_enable(const struct thermal_zone_device *tz) { - struct param p = { .tz_id = tz_id }; + struct param p = { .tz_id = tz->id }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_ENABLE, &p); } -int thermal_notify_tz_disable(int tz_id) +int thermal_notify_tz_disable(const struct thermal_zone_device *tz) { - struct param p = { .tz_id = tz_id }; + struct param p = { .tz_id = tz->id }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_DISABLE, &p); } @@ -386,9 +386,10 @@ int thermal_notify_cdev_delete(int cdev_id) return thermal_genl_send_event(THERMAL_GENL_EVENT_CDEV_DELETE, &p); } -int thermal_notify_tz_gov_change(int tz_id, const char *name) +int thermal_notify_tz_gov_change(const struct thermal_zone_device *tz, + const char *name) { - struct param p = { .tz_id = tz_id, .name = name }; + struct param p = { .tz_id = tz->id, .name = name }; return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_GOV_CHANGE, &p); } diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h index e780ce3f7db6..13ea2ad23d3a 100644 --- a/drivers/thermal/thermal_netlink.h +++ b/drivers/thermal/thermal_netlink.h @@ -17,10 +17,10 @@ struct thermal_trip; #ifdef CONFIG_THERMAL_NETLINK int __init thermal_netlink_init(void); void __init thermal_netlink_exit(void); -int thermal_notify_tz_create(int tz_id, const char *name); -int thermal_notify_tz_delete(int tz_id); -int thermal_notify_tz_enable(int tz_id); -int thermal_notify_tz_disable(int tz_id); +int thermal_notify_tz_create(const struct thermal_zone_device *tz); +int thermal_notify_tz_delete(const struct thermal_zone_device *tz); +int thermal_notify_tz_enable(const struct thermal_zone_device *tz); +int thermal_notify_tz_disable(const struct thermal_zone_device *tz); int thermal_notify_tz_trip_down(const struct thermal_zone_device *tz, const struct thermal_trip *trip); int thermal_notify_tz_trip_up(const struct thermal_zone_device *tz, @@ -30,7 +30,8 @@ int thermal_notify_tz_trip_change(const struct thermal_zone_device *tz, int thermal_notify_cdev_state_update(int cdev_id, int state); int thermal_notify_cdev_add(int cdev_id, const char *name, int max_state); int thermal_notify_cdev_delete(int cdev_id); -int thermal_notify_tz_gov_change(int tz_id, const char *name); +int thermal_notify_tz_gov_change(const struct thermal_zone_device *tz, + const char *name); int thermal_genl_sampling_temp(int id, int temp); int thermal_genl_cpu_capability_event(int count, struct thermal_genl_cpu_caps *caps); @@ -40,22 +41,22 @@ static inline int thermal_netlink_init(void) return 0; } -static inline int thermal_notify_tz_create(int tz_id, const char *name) +static inline int thermal_notify_tz_create(const struct thermal_zone_device *tz) { return 0; } -static inline int thermal_notify_tz_delete(int tz_id) +static inline int thermal_notify_tz_delete(const struct thermal_zone_device *tz) { return 0; } -static inline int thermal_notify_tz_enable(int tz_id) +static inline int thermal_notify_tz_enable(const struct thermal_zone_device *tz) { return 0; } -static inline int thermal_notify_tz_disable(int tz_id) +static inline int thermal_notify_tz_disable(const struct thermal_zone_device *tz) { return 0; } @@ -94,7 +95,8 @@ static inline int thermal_notify_cdev_delete(int cdev_id) return 0; } -static inline int thermal_notify_tz_gov_change(int tz_id, const char *name) +static inline int thermal_notify_tz_gov_change(const struct thermal_zone_device *tz, + const char *name) { return 0; } -- cgit v1.2.3 From 755113d7678681a137c330f7997ceb680adb644e Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 9 Jan 2024 10:41:11 +0100 Subject: thermal/debugfs: Add thermal cooling device debugfs information The thermal framework does not have any debug information except a sysfs stat which is a bit controversial. This one allocates big chunks of memory for every cooling devices with a high number of states and could represent on some systems in production several megabytes of memory for just a portion of it. As the sysfs is limited to a page size, the output is not exploitable with large data array and gets truncated. The patch provides the same information than sysfs except the transitions are dynamically allocated, thus they won't show more events than the ones which actually occurred. There is no longer a size limitation and it opens the field for more debugging information where the debugfs is designed for, not sysfs. The thermal debugfs directory structure tries to stay consistent with the sysfs one but in a very simplified way: thermal/ -- cooling_devices |-- 0 | |-- clear | |-- time_in_state_ms | |-- total_trans | `-- trans_table |-- 1 | |-- clear | |-- time_in_state_ms | |-- total_trans | `-- trans_table |-- 2 | |-- clear | |-- time_in_state_ms | |-- total_trans | `-- trans_table |-- 3 | |-- clear | |-- time_in_state_ms | |-- total_trans | `-- trans_table `-- 4 |-- clear |-- time_in_state_ms |-- total_trans `-- trans_table The content of the files in the cooling devices directory is the same as the sysfs one except for the trans_table which has the following format: Transition Hits 1->0 246 0->1 246 2->1 632 1->2 632 3->2 98 2->3 98 Signed-off-by: Daniel Lezcano [ rjw: White space fixups, rebase ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/Kconfig | 7 + drivers/thermal/Makefile | 2 + drivers/thermal/thermal_core.c | 6 + drivers/thermal/thermal_core.h | 1 + drivers/thermal/thermal_debugfs.c | 446 ++++++++++++++++++++++++++++++++++++++ drivers/thermal/thermal_debugfs.h | 14 ++ drivers/thermal/thermal_helpers.c | 20 +- include/linux/thermal.h | 7 + 8 files changed, 497 insertions(+), 6 deletions(-) create mode 100644 drivers/thermal/thermal_debugfs.c create mode 100644 drivers/thermal/thermal_debugfs.h (limited to 'drivers') diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index c81a00fbca7d..3ff7add3fb7c 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -33,6 +33,13 @@ config THERMAL_STATISTICS If in doubt, say N. +config THERMAL_DEBUGFS + bool "Thermal subsystem debug support" + depends on DEBUG_FS + help + Say Y to allow the thermal subsystem to collect diagnostic + information that can be accessed via debugfs. + config THERMAL_EMERGENCY_POWEROFF_DELAY_MS int "Emergency poweroff delay in milli-seconds" default 0 diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index c934cab309ae..0f65ae86a9c6 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -10,6 +10,8 @@ thermal_sys-y += thermal_trip.o thermal_helpers.o # netlink interface to manage the thermal framework thermal_sys-$(CONFIG_THERMAL_NETLINK) += thermal_netlink.o +thermal_sys-$(CONFIG_THERMAL_DEBUGFS) += thermal_debugfs.o + # interface to/from other layers providing sensors thermal_sys-$(CONFIG_THERMAL_HWMON) += thermal_hwmon.o thermal_sys-$(CONFIG_THERMAL_OF) += thermal_of.o diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index c0b012e075f3..5d9cc422d7ba 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -960,6 +960,8 @@ __thermal_cooling_device_register(struct device_node *np, mutex_unlock(&thermal_list_lock); + thermal_debug_cdev_add(cdev); + return cdev; out_cooling_dev: @@ -1166,6 +1168,8 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) if (!cdev) return; + thermal_debug_cdev_remove(cdev); + mutex_lock(&thermal_list_lock); if (!thermal_cooling_device_present(cdev)) { @@ -1629,6 +1633,8 @@ static int __init thermal_init(void) { int result; + thermal_debug_init(); + result = thermal_netlink_init(); if (result) goto error; diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 4e023d54fd27..e9c099ecdd0f 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -13,6 +13,7 @@ #include #include "thermal_netlink.h" +#include "thermal_debugfs.h" /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c new file mode 100644 index 000000000000..b18bdde47847 --- /dev/null +++ b/drivers/thermal/thermal_debugfs.c @@ -0,0 +1,446 @@ + +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2023 Linaro Limited + * + * Author: Daniel Lezcano + * + * Thermal subsystem debug support + */ +#include +#include +#include +#include +#include +#include + +static struct dentry *d_root; +static struct dentry *d_cdev; + +/* + * Length of the string containing the thermal zone id or the cooling + * device id, including the ending nul character. We can reasonably + * assume there won't be more than 256 thermal zones as the maximum + * observed today is around 32. + */ +#define IDSLENGTH 4 + +/* + * The cooling device transition list is stored in a hash table where + * the size is CDEVSTATS_HASH_SIZE. The majority of cooling devices + * have dozen of states but some can have much more, so a hash table + * is more adequate in this case, because the cost of browsing the entire + * list when storing the transitions may not be negligible. + */ +#define CDEVSTATS_HASH_SIZE 16 + +/** + * struct cdev_debugfs - per cooling device statistics structure + * A cooling device can have a high number of states. Showing the + * transitions on a matrix based representation can be overkill given + * most of the transitions won't happen and we end up with a matrix + * filled with zero. Instead, we show the transitions which actually + * happened. + * + * Every transition updates the current_state and the timestamp. The + * transitions and the durations are stored in lists. + * + * @total: the number of transitions for this cooling device + * @current_state: the current cooling device state + * @timestamp: the state change timestamp + * @transitions: an array of lists containing the state transitions + * @durations: an array of lists containing the residencies of each state + */ +struct cdev_debugfs { + u32 total; + int current_state; + ktime_t timestamp; + struct list_head transitions[CDEVSTATS_HASH_SIZE]; + struct list_head durations[CDEVSTATS_HASH_SIZE]; +}; + +/** + * struct cdev_value - Common structure for cooling device entry + * + * The following common structure allows to store the information + * related to the transitions and to the state residencies. They are + * identified with a id which is associated to a value. It is used as + * nodes for the "transitions" and "durations" above. + * + * @node: node to insert the structure in a list + * @id: identifier of the value which can be a state or a transition + * @residency: a ktime_t representing a state residency duration + * @count: a number of occurrences + */ +struct cdev_record { + struct list_head node; + int id; + union { + ktime_t residency; + u64 count; + }; +}; + +/** + * struct thermal_debugfs - High level structure for a thermal object in debugfs + * + * The thermal_debugfs structure is the common structure used by the + * cooling device to compute the statistics. + * + * @d_top: top directory of the thermal object directory + * @lock: per object lock to protect the internals + * + * @cdev: a cooling device debug structure + */ +struct thermal_debugfs { + struct dentry *d_top; + struct mutex lock; + union { + struct cdev_debugfs cdev_dbg; + }; +}; + +void thermal_debug_init(void) +{ + d_root = debugfs_create_dir("thermal", NULL); + if (!d_root) + return; + + d_cdev = debugfs_create_dir("cooling_devices", d_root); +} + +static struct thermal_debugfs *thermal_debugfs_add_id(struct dentry *d, int id) +{ + struct thermal_debugfs *thermal_dbg; + char ids[IDSLENGTH]; + + thermal_dbg = kzalloc(sizeof(*thermal_dbg), GFP_KERNEL); + if (!thermal_dbg) + return NULL; + + mutex_init(&thermal_dbg->lock); + + snprintf(ids, IDSLENGTH, "%d", id); + + thermal_dbg->d_top = debugfs_create_dir(ids, d); + if (!thermal_dbg->d_top) { + kfree(thermal_dbg); + return NULL; + } + + return thermal_dbg; +} + +static void thermal_debugfs_remove_id(struct thermal_debugfs *thermal_dbg) +{ + if (!thermal_dbg) + return; + + debugfs_remove(thermal_dbg->d_top); + + kfree(thermal_dbg); +} + +static struct cdev_record * +thermal_debugfs_cdev_record_alloc(struct thermal_debugfs *thermal_dbg, + struct list_head *lists, int id) +{ + struct cdev_record *cdev_record; + + cdev_record = kzalloc(sizeof(*cdev_record), GFP_KERNEL); + if (!cdev_record) + return NULL; + + cdev_record->id = id; + INIT_LIST_HEAD(&cdev_record->node); + list_add_tail(&cdev_record->node, + &lists[cdev_record->id % CDEVSTATS_HASH_SIZE]); + + return cdev_record; +} + +static struct cdev_record * +thermal_debugfs_cdev_record_find(struct thermal_debugfs *thermal_dbg, + struct list_head *lists, int id) +{ + struct cdev_record *entry; + + list_for_each_entry(entry, &lists[id % CDEVSTATS_HASH_SIZE], node) + if (entry->id == id) + return entry; + + return NULL; +} + +static struct cdev_record * +thermal_debugfs_cdev_record_get(struct thermal_debugfs *thermal_dbg, + struct list_head *lists, int id) +{ + struct cdev_record *cdev_record; + + cdev_record = thermal_debugfs_cdev_record_find(thermal_dbg, lists, id); + if (cdev_record) + return cdev_record; + + return thermal_debugfs_cdev_record_alloc(thermal_dbg, lists, id); +} + +static void thermal_debugfs_cdev_clear(struct cdev_debugfs *cdev_dbg) +{ + int i; + struct cdev_record *entry, *tmp; + + for (i = 0; i < CDEVSTATS_HASH_SIZE; i++) { + + list_for_each_entry_safe(entry, tmp, + &cdev_dbg->transitions[i], node) { + list_del(&entry->node); + kfree(entry); + } + + list_for_each_entry_safe(entry, tmp, + &cdev_dbg->durations[i], node) { + list_del(&entry->node); + kfree(entry); + } + } + + cdev_dbg->total = 0; +} + +static void *cdev_seq_start(struct seq_file *s, loff_t *pos) +{ + struct thermal_debugfs *thermal_dbg = s->private; + + mutex_lock(&thermal_dbg->lock); + + return (*pos < CDEVSTATS_HASH_SIZE) ? pos : NULL; +} + +static void *cdev_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + + return (*pos < CDEVSTATS_HASH_SIZE) ? pos : NULL; +} + +static void cdev_seq_stop(struct seq_file *s, void *v) +{ + struct thermal_debugfs *thermal_dbg = s->private; + + mutex_unlock(&thermal_dbg->lock); +} + +static int cdev_tt_seq_show(struct seq_file *s, void *v) +{ + struct thermal_debugfs *thermal_dbg = s->private; + struct cdev_debugfs *cdev_dbg = &thermal_dbg->cdev_dbg; + struct list_head *transitions = cdev_dbg->transitions; + struct cdev_record *entry; + int i = *(loff_t *)v; + + if (!i) + seq_puts(s, "Transition\tOccurences\n"); + + list_for_each_entry(entry, &transitions[i], node) { + /* + * Assuming maximum cdev states is 1024, the longer + * string for a transition would be "1024->1024\0" + */ + char buffer[11]; + + snprintf(buffer, ARRAY_SIZE(buffer), "%d->%d", + entry->id >> 16, entry->id & 0xFFFF); + + seq_printf(s, "%-10s\t%-10llu\n", buffer, entry->count); + } + + return 0; +} + +static const struct seq_operations tt_sops = { + .start = cdev_seq_start, + .next = cdev_seq_next, + .stop = cdev_seq_stop, + .show = cdev_tt_seq_show, +}; + +DEFINE_SEQ_ATTRIBUTE(tt); + +static int cdev_dt_seq_show(struct seq_file *s, void *v) +{ + struct thermal_debugfs *thermal_dbg = s->private; + struct cdev_debugfs *cdev_dbg = &thermal_dbg->cdev_dbg; + struct list_head *durations = cdev_dbg->durations; + struct cdev_record *entry; + int i = *(loff_t *)v; + + if (!i) + seq_puts(s, "State\tResidency\n"); + + list_for_each_entry(entry, &durations[i], node) { + s64 duration = ktime_to_ms(entry->residency); + + if (entry->id == cdev_dbg->current_state) + duration += ktime_ms_delta(ktime_get(), + cdev_dbg->timestamp); + + seq_printf(s, "%-5d\t%-10llu\n", entry->id, duration); + } + + return 0; +} + +static const struct seq_operations dt_sops = { + .start = cdev_seq_start, + .next = cdev_seq_next, + .stop = cdev_seq_stop, + .show = cdev_dt_seq_show, +}; + +DEFINE_SEQ_ATTRIBUTE(dt); + +static int cdev_clear_set(void *data, u64 val) +{ + struct thermal_debugfs *thermal_dbg = data; + + if (!val) + return -EINVAL; + + mutex_lock(&thermal_dbg->lock); + + thermal_debugfs_cdev_clear(&thermal_dbg->cdev_dbg); + + mutex_unlock(&thermal_dbg->lock); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(cdev_clear_fops, NULL, cdev_clear_set, "%llu\n"); + +/** + * thermal_debug_cdev_state_update - Update a cooling device state change + * + * Computes a transition and the duration of the previous state residency. + * + * @cdev : a pointer to a cooling device + * @new_state: an integer corresponding to the new cooling device state + */ +void thermal_debug_cdev_state_update(const struct thermal_cooling_device *cdev, + int new_state) +{ + struct thermal_debugfs *thermal_dbg = cdev->debugfs; + struct cdev_debugfs *cdev_dbg; + struct cdev_record *cdev_record; + int transition, old_state; + + if (!thermal_dbg || (thermal_dbg->cdev_dbg.current_state == new_state)) + return; + + mutex_lock(&thermal_dbg->lock); + + cdev_dbg = &thermal_dbg->cdev_dbg; + + old_state = cdev_dbg->current_state; + + /* + * Get the old state information in the durations list. If + * this one does not exist, a new allocated one will be + * returned. Recompute the total duration in the old state and + * get a new timestamp for the new state. + */ + cdev_record = thermal_debugfs_cdev_record_get(thermal_dbg, + cdev_dbg->durations, + old_state); + if (cdev_record) { + ktime_t now = ktime_get(); + ktime_t delta = ktime_sub(now, cdev_dbg->timestamp); + cdev_record->residency = ktime_add(cdev_record->residency, delta); + cdev_dbg->timestamp = now; + } + + cdev_dbg->current_state = new_state; + transition = (old_state << 16) | new_state; + + /* + * Get the transition in the transitions list. If this one + * does not exist, a new allocated one will be returned. + * Increment the occurrence of this transition which is stored + * in the value field. + */ + cdev_record = thermal_debugfs_cdev_record_get(thermal_dbg, + cdev_dbg->transitions, + transition); + if (cdev_record) + cdev_record->count++; + + cdev_dbg->total++; + + mutex_unlock(&thermal_dbg->lock); +} + +/** + * thermal_debug_cdev_add - Add a cooling device debugfs entry + * + * Allocates a cooling device object for debug, initializes the + * statistics and create the entries in sysfs. + * @cdev: a pointer to a cooling device + */ +void thermal_debug_cdev_add(struct thermal_cooling_device *cdev) +{ + struct thermal_debugfs *thermal_dbg; + struct cdev_debugfs *cdev_dbg; + int i; + + thermal_dbg = thermal_debugfs_add_id(d_cdev, cdev->id); + if (!thermal_dbg) + return; + + cdev_dbg = &thermal_dbg->cdev_dbg; + + for (i = 0; i < CDEVSTATS_HASH_SIZE; i++) { + INIT_LIST_HEAD(&cdev_dbg->transitions[i]); + INIT_LIST_HEAD(&cdev_dbg->durations[i]); + } + + cdev_dbg->current_state = 0; + cdev_dbg->timestamp = ktime_get(); + + debugfs_create_file("trans_table", 0400, thermal_dbg->d_top, + thermal_dbg, &tt_fops); + + debugfs_create_file("time_in_state_ms", 0400, thermal_dbg->d_top, + thermal_dbg, &dt_fops); + + debugfs_create_file("clear", 0200, thermal_dbg->d_top, + thermal_dbg, &cdev_clear_fops); + + debugfs_create_u32("total_trans", 0400, thermal_dbg->d_top, + &cdev_dbg->total); + + cdev->debugfs = thermal_dbg; +} + +/** + * thermal_debug_cdev_remove - Remove a cooling device debugfs entry + * + * Frees the statistics memory data and remove the debugfs entry + * + * @cdev: a pointer to a cooling device + */ +void thermal_debug_cdev_remove(struct thermal_cooling_device *cdev) +{ + struct thermal_debugfs *thermal_dbg = cdev->debugfs; + + if (!thermal_dbg) + return; + + mutex_lock(&thermal_dbg->lock); + + thermal_debugfs_cdev_clear(&thermal_dbg->cdev_dbg); + cdev->debugfs = NULL; + + mutex_unlock(&thermal_dbg->lock); + + thermal_debugfs_remove_id(thermal_dbg); +} diff --git a/drivers/thermal/thermal_debugfs.h b/drivers/thermal/thermal_debugfs.h new file mode 100644 index 000000000000..341499388448 --- /dev/null +++ b/drivers/thermal/thermal_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifdef CONFIG_THERMAL_DEBUGFS +void thermal_debug_init(void); +void thermal_debug_cdev_add(struct thermal_cooling_device *cdev); +void thermal_debug_cdev_remove(struct thermal_cooling_device *cdev); +void thermal_debug_cdev_state_update(const struct thermal_cooling_device *cdev, int state); +#else +static inline void thermal_debug_init(void) {} +static inline void thermal_debug_cdev_add(struct thermal_cooling_device *cdev) {} +static inline void thermal_debug_cdev_remove(struct thermal_cooling_device *cdev) {} +static inline void thermal_debug_cdev_state_update(const struct thermal_cooling_device *cdev, + int state) {} +#endif /* CONFIG_THERMAL_DEBUGFS */ diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c index c3982e0f0075..8f85581693e7 100644 --- a/drivers/thermal/thermal_helpers.c +++ b/drivers/thermal/thermal_helpers.c @@ -146,14 +146,22 @@ unlock: } EXPORT_SYMBOL_GPL(thermal_zone_get_temp); -static void thermal_cdev_set_cur_state(struct thermal_cooling_device *cdev, - int target) +static int thermal_cdev_set_cur_state(struct thermal_cooling_device *cdev, int state) { - if (cdev->ops->set_cur_state(cdev, target)) - return; + int ret; + + /* + * No check is needed for the ops->set_cur_state as the + * registering function checked the ops are correctly set + */ + ret = cdev->ops->set_cur_state(cdev, state); + if (!ret) { + thermal_notify_cdev_state_update(cdev->id, state); + thermal_cooling_device_stats_update(cdev, state); + thermal_debug_cdev_state_update(cdev, state); + } - thermal_notify_cdev_state_update(cdev->id, target); - thermal_cooling_device_stats_update(cdev, target); + return ret; } void __thermal_cdev_update(struct thermal_cooling_device *cdev) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 9d0427da32af..7defea8fa223 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -32,6 +32,7 @@ struct thermal_zone_device; struct thermal_cooling_device; struct thermal_instance; +struct thermal_debugfs; struct thermal_attr; enum thermal_trend { @@ -113,6 +114,9 @@ struct thermal_cooling_device { struct mutex lock; /* protect thermal_instances list */ struct list_head thermal_instances; struct list_head node; +#ifdef CONFIG_THERMAL_DEBUGFS + struct thermal_debugfs *debugfs; +#endif }; /** @@ -189,6 +193,9 @@ struct thermal_zone_device { struct list_head node; struct delayed_work poll_queue; enum thermal_notify_event notify_event; +#ifdef CONFIG_THERMAL_DEBUGFS + struct thermal_debugfs *debugfs; +#endif bool suspended; }; -- cgit v1.2.3 From 7ef01f228c9f54c6260319858be138a8a7e9e704 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 9 Jan 2024 10:41:12 +0100 Subject: thermal/debugfs: Add thermal debugfs information for mitigation episodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mitigation episodes are recorded. A mitigation episode happens when the first trip point is crossed the way up and then the way down. During this episode other trip points can be crossed also and are accounted for this mitigation episode. The interesting information is the average temperature at the trip point, the undershot and the overshot. The standard deviation of the mitigated temperature will be added later. The thermal debugfs directory structure tries to stay consistent with the sysfs one but in a very simplified way: thermal/ `-- thermal_zones |-- 0 | `-- mitigations `-- 1 `-- mitigations The content of the mitigations file has the following format: ,-Mitigation at 349988258us, duration=130136ms | trip | type | temp(°mC) | hyst(°mC) | duration | avg(°mC) | min(°mC) | max(°mC) | | 0 | passive | 65000 | 2000 | 130136 | 68227 | 62500 | 75625 | | 1 | passive | 75000 | 2000 | 104209 | 74857 | 71666 | 77500 | ,-Mitigation at 272451637us, duration=75000ms | trip | type | temp(°mC) | hyst(°mC) | duration | avg(°mC) | min(°mC) | max(°mC) | | 0 | passive | 65000 | 2000 | 75000 | 68561 | 62500 | 75000 | | 1 | passive | 75000 | 2000 | 60714 | 74820 | 70555 | 77500 | ,-Mitigation at 238184119us, duration=27316ms | trip | type | temp(°mC) | hyst(°mC) | duration | avg(°mC) | min(°mC) | max(°mC) | | 0 | passive | 65000 | 2000 | 27316 | 73377 | 62500 | 75000 | | 1 | passive | 75000 | 2000 | 19468 | 75284 | 69444 | 77500 | ,-Mitigation at 39863713us, duration=136196ms | trip | type | temp(°mC) | hyst(°mC) | duration | avg(°mC) | min(°mC) | max(°mC) | | 0 | passive | 65000 | 2000 | 136196 | 73922 | 62500 | 75000 | | 1 | passive | 75000 | 2000 | 91721 | 74386 | 69444 | 78125 | More information for a better understanding of the thermal behavior will be added after. The idea is to give detailed statistics information about the undershots and overshots, the temperature speed, etc... As all the information in a single file is too much, the idea would be to create a directory named with the mitigation timestamp where all data could be added. Please note this code is immune against trip ordering but not against a trip temperature change while a mitigation is happening. However, this situation should be extremely rare, perhaps not happening and we might question ourselves if something should be done in the core framework for other components first. Signed-off-by: Daniel Lezcano [ rjw: White space fixups, rebase ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_core.c | 7 + drivers/thermal/thermal_debugfs.c | 400 +++++++++++++++++++++++++++++++++++++- drivers/thermal/thermal_debugfs.h | 14 ++ 3 files changed, 417 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 5d9cc422d7ba..abf4c2390b19 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -382,6 +382,7 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, */ if (tz->temperature >= trip->temperature) { thermal_notify_tz_trip_up(tz, trip); + thermal_debug_tz_trip_up(tz, trip); trip->threshold = trip->temperature - trip->hysteresis; } else { trip->threshold = trip->temperature; @@ -399,6 +400,7 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, */ if (tz->temperature < trip->temperature - trip->hysteresis) { thermal_notify_tz_trip_down(tz, trip); + thermal_debug_tz_trip_down(tz, trip); trip->threshold = trip->temperature; } else { trip->threshold = trip->temperature - trip->hysteresis; @@ -430,6 +432,7 @@ static void update_temperature(struct thermal_zone_device *tz) trace_thermal_temperature(tz); thermal_genl_sampling_temp(tz->id, temp); + thermal_debug_update_temp(tz); } static void thermal_zone_device_check(struct work_struct *work) @@ -1413,6 +1416,8 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t thermal_notify_tz_create(tz); + thermal_debug_tz_add(tz); + return tz; unregister: @@ -1476,6 +1481,8 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) if (!tz) return; + thermal_debug_tz_remove(tz); + mutex_lock(&thermal_list_lock); list_for_each_entry(pos, &thermal_tz_list, node) if (pos == tz) diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c index b18bdde47847..b53881e9e0b6 100644 --- a/drivers/thermal/thermal_debugfs.c +++ b/drivers/thermal/thermal_debugfs.c @@ -1,4 +1,3 @@ - // SPDX-License-Identifier: GPL-2.0 /* * Copyright 2023 Linaro Limited @@ -14,8 +13,11 @@ #include #include +#include "thermal_core.h" + static struct dentry *d_root; static struct dentry *d_cdev; +static struct dentry *d_tz; /* * Length of the string containing the thermal zone id or the cooling @@ -60,7 +62,7 @@ struct cdev_debugfs { }; /** - * struct cdev_value - Common structure for cooling device entry + * struct cdev_record - Common structure for cooling device entry * * The following common structure allows to store the information * related to the transitions and to the state residencies. They are @@ -81,22 +83,89 @@ struct cdev_record { }; }; +/** + * struct trip_stats - Thermal trip statistics + * + * The trip_stats structure has the relevant information to show the + * statistics related to temperature going above a trip point. + * + * @timestamp: the trip crossing timestamp + * @duration: total time when the zone temperature was above the trip point + * @count: the number of times the zone temperature was above the trip point + * @max: maximum recorded temperature above the trip point + * @min: minimum recorded temperature above the trip point + * @avg: average temperature above the trip point + */ +struct trip_stats { + ktime_t timestamp; + ktime_t duration; + int count; + int max; + int min; + int avg; +}; + +/** + * struct tz_episode - A mitigation episode information + * + * The tz_episode structure describes a mitigation episode. A + * mitigation episode begins the trip point with the lower temperature + * is crossed the way up and ends when it is crossed the way + * down. During this episode we can have multiple trip points crossed + * the way up and down if there are multiple trip described in the + * firmware after the lowest temperature trip point. + * + * @timestamp: first trip point crossed the way up + * @duration: total duration of the mitigation episode + * @node: a list element to be added to the list of tz events + * @trip_stats: per trip point statistics, flexible array + */ +struct tz_episode { + ktime_t timestamp; + ktime_t duration; + struct list_head node; + struct trip_stats trip_stats[]; +}; + +/** + * struct tz_debugfs - Store all mitigation episodes for a thermal zone + * + * The tz_debugfs structure contains the list of the mitigation + * episodes and has to track which trip point has been crossed in + * order to handle correctly nested trip point mitigation episodes. + * + * We keep the history of the trip point crossed in an array and as we + * can go back and forth inside this history, eg. trip 0,1,2,1,2,1,0, + * we keep track of the current position in the history array. + * + * @tz_episodes: a list of thermal mitigation episodes + * @trips_crossed: an array of trip points crossed by id + * @nr_trips: the number of trip points currently being crossed + */ +struct tz_debugfs { + struct list_head tz_episodes; + int *trips_crossed; + int nr_trips; +}; + /** * struct thermal_debugfs - High level structure for a thermal object in debugfs * * The thermal_debugfs structure is the common structure used by the - * cooling device to compute the statistics. + * cooling device or the thermal zone to store the statistics. * * @d_top: top directory of the thermal object directory * @lock: per object lock to protect the internals * - * @cdev: a cooling device debug structure + * @cdev_dbg: a cooling device debug structure + * @tz_dbg: a thermal zone debug structure */ struct thermal_debugfs { struct dentry *d_top; struct mutex lock; union { struct cdev_debugfs cdev_dbg; + struct tz_debugfs tz_dbg; }; }; @@ -107,6 +176,10 @@ void thermal_debug_init(void) return; d_cdev = debugfs_create_dir("cooling_devices", d_root); + if (!d_cdev) + return; + + d_tz = debugfs_create_dir("thermal_zones", d_root); } static struct thermal_debugfs *thermal_debugfs_add_id(struct dentry *d, int id) @@ -444,3 +517,322 @@ void thermal_debug_cdev_remove(struct thermal_cooling_device *cdev) thermal_debugfs_remove_id(thermal_dbg); } + +static struct tz_episode *thermal_debugfs_tz_event_alloc(struct thermal_zone_device *tz, + ktime_t now) +{ + struct tz_episode *tze; + int i; + + tze = kzalloc(struct_size(tze, trip_stats, tz->num_trips), GFP_KERNEL); + if (!tze) + return NULL; + + INIT_LIST_HEAD(&tze->node); + tze->timestamp = now; + + for (i = 0; i < tz->num_trips; i++) { + tze->trip_stats[i].min = INT_MAX; + tze->trip_stats[i].max = INT_MIN; + } + + return tze; +} + +void thermal_debug_tz_trip_up(struct thermal_zone_device *tz, + const struct thermal_trip *trip) +{ + struct tz_episode *tze; + struct tz_debugfs *tz_dbg; + struct thermal_debugfs *thermal_dbg = tz->debugfs; + int temperature = tz->temperature; + int trip_id = thermal_zone_trip_id(tz, trip); + ktime_t now = ktime_get(); + + if (!thermal_dbg) + return; + + mutex_lock(&thermal_dbg->lock); + + tz_dbg = &thermal_dbg->tz_dbg; + + /* + * The mitigation is starting. A mitigation can contain + * several episodes where each of them is related to a + * temperature crossing a trip point. The episodes are + * nested. That means when the temperature is crossing the + * first trip point, the duration begins to be measured. If + * the temperature continues to increase and reaches the + * second trip point, the duration of the first trip must be + * also accumulated. + * + * eg. + * + * temp + * ^ + * | -------- + * trip 2 / \ ------ + * | /| |\ /| |\ + * trip 1 / | | `---- | | \ + * | /| | | | | |\ + * trip 0 / | | | | | | \ + * | /| | | | | | | |\ + * | / | | | | | | | | `-- + * | / | | | | | | | | + * |----- | | | | | | | | + * | | | | | | | | | + * --------|-|-|--------|--------|------|-|-|------------------> time + * | | |<--t2-->| |<-t2'>| | | + * | | | | + * | |<------------t1------------>| | + * | | + * |<-------------t0--------------->| + * + */ + if (!tz_dbg->nr_trips) { + tze = thermal_debugfs_tz_event_alloc(tz, now); + if (!tze) + return; + + list_add(&tze->node, &tz_dbg->tz_episodes); + } + + /* + * Each time a trip point is crossed the way up, the trip_id + * is stored in the trip_crossed array and the nr_trips is + * incremented. A nr_trips equal to zero means we are entering + * a mitigation episode. + * + * The trip ids may not be in the ascending order but the + * result in the array trips_crossed will be in the ascending + * temperature order. The function detecting when a trip point + * is crossed the way down will handle the very rare case when + * the trip points may have been reordered during this + * mitigation episode. + */ + tz_dbg->trips_crossed[tz_dbg->nr_trips++] = trip_id; + + tze = list_first_entry(&tz_dbg->tz_episodes, struct tz_episode, node); + tze->trip_stats[trip_id].timestamp = now; + tze->trip_stats[trip_id].max = max(tze->trip_stats[trip_id].max, temperature); + tze->trip_stats[trip_id].min = min(tze->trip_stats[trip_id].min, temperature); + tze->trip_stats[trip_id].avg = tze->trip_stats[trip_id].avg + + (temperature - tze->trip_stats[trip_id].avg) / + tze->trip_stats[trip_id].count; + + mutex_unlock(&thermal_dbg->lock); +} + +void thermal_debug_tz_trip_down(struct thermal_zone_device *tz, + const struct thermal_trip *trip) +{ + struct thermal_debugfs *thermal_dbg = tz->debugfs; + struct tz_episode *tze; + struct tz_debugfs *tz_dbg; + ktime_t delta, now = ktime_get(); + int trip_id = thermal_zone_trip_id(tz, trip); + int i; + + if (!thermal_dbg) + return; + + mutex_lock(&thermal_dbg->lock); + + tz_dbg = &thermal_dbg->tz_dbg; + + /* + * The temperature crosses the way down but there was not + * mitigation detected before. That may happen when the + * temperature is greater than a trip point when registering a + * thermal zone, which is a common use case as the kernel has + * no mitigation mechanism yet at boot time. + */ + if (!tz_dbg->nr_trips) + goto out; + + for (i = tz_dbg->nr_trips - 1; i >= 0; i--) { + if (tz_dbg->trips_crossed[i] == trip_id) + break; + } + + if (i < 0) + goto out; + + tz_dbg->nr_trips--; + + if (i < tz_dbg->nr_trips) + tz_dbg->trips_crossed[i] = tz_dbg->trips_crossed[tz_dbg->nr_trips]; + + tze = list_first_entry(&tz_dbg->tz_episodes, struct tz_episode, node); + + delta = ktime_sub(now, tze->trip_stats[trip_id].timestamp); + + tze->trip_stats[trip_id].duration = + ktime_add(delta, tze->trip_stats[trip_id].duration); + + /* + * This event closes the mitigation as we are crossing the + * last trip point the way down. + */ + if (!tz_dbg->nr_trips) + tze->duration = ktime_sub(now, tze->timestamp); + +out: + mutex_unlock(&thermal_dbg->lock); +} + +void thermal_debug_update_temp(struct thermal_zone_device *tz) +{ + struct thermal_debugfs *thermal_dbg = tz->debugfs; + struct tz_episode *tze; + struct tz_debugfs *tz_dbg; + int trip_id, i; + + if (!thermal_dbg) + return; + + mutex_lock(&thermal_dbg->lock); + + tz_dbg = &thermal_dbg->tz_dbg; + + if (!tz_dbg->nr_trips) + goto out; + + for (i = 0; i < tz_dbg->nr_trips; i++) { + trip_id = tz_dbg->trips_crossed[i]; + tze = list_first_entry(&tz_dbg->tz_episodes, struct tz_episode, node); + tze->trip_stats[trip_id].count++; + tze->trip_stats[trip_id].max = max(tze->trip_stats[trip_id].max, tz->temperature); + tze->trip_stats[trip_id].min = min(tze->trip_stats[trip_id].min, tz->temperature); + tze->trip_stats[trip_id].avg = tze->trip_stats[trip_id].avg + + (tz->temperature - tze->trip_stats[trip_id].avg) / + tze->trip_stats[trip_id].count; + } +out: + mutex_unlock(&thermal_dbg->lock); +} + +static void *tze_seq_start(struct seq_file *s, loff_t *pos) +{ + struct thermal_zone_device *tz = s->private; + struct thermal_debugfs *thermal_dbg = tz->debugfs; + struct tz_debugfs *tz_dbg = &thermal_dbg->tz_dbg; + + mutex_lock(&thermal_dbg->lock); + + return seq_list_start(&tz_dbg->tz_episodes, *pos); +} + +static void *tze_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct thermal_zone_device *tz = s->private; + struct thermal_debugfs *thermal_dbg = tz->debugfs; + struct tz_debugfs *tz_dbg = &thermal_dbg->tz_dbg; + + return seq_list_next(v, &tz_dbg->tz_episodes, pos); +} + +static void tze_seq_stop(struct seq_file *s, void *v) +{ + struct thermal_zone_device *tz = s->private; + struct thermal_debugfs *thermal_dbg = tz->debugfs; + + mutex_unlock(&thermal_dbg->lock); +} + +static int tze_seq_show(struct seq_file *s, void *v) +{ + struct thermal_zone_device *tz = s->private; + struct thermal_trip *trip; + struct tz_episode *tze; + const char *type; + int trip_id; + + tze = list_entry((struct list_head *)v, struct tz_episode, node); + + seq_printf(s, ",-Mitigation at %lluus, duration=%llums\n", + ktime_to_us(tze->timestamp), + ktime_to_ms(tze->duration)); + + seq_printf(s, "| trip | type | temp(°mC) | hyst(°mC) | duration | avg(°mC) | min(°mC) | max(°mC) |\n"); + + for_each_trip(tz, trip) { + /* + * There is no possible mitigation happening at the + * critical trip point, so the stats will be always + * zero, skip this trip point + */ + if (trip->type == THERMAL_TRIP_CRITICAL) + continue; + + if (trip->type == THERMAL_TRIP_PASSIVE) + type = "passive"; + else if (trip->type == THERMAL_TRIP_ACTIVE) + type = "active"; + else + type = "hot"; + + trip_id = thermal_zone_trip_id(tz, trip); + + seq_printf(s, "| %*d | %*s | %*d | %*d | %*lld | %*d | %*d | %*d |\n", + 4 , trip_id, + 8, type, + 9, trip->temperature, + 9, trip->hysteresis, + 10, ktime_to_ms(tze->trip_stats[trip_id].duration), + 9, tze->trip_stats[trip_id].avg, + 9, tze->trip_stats[trip_id].min, + 9, tze->trip_stats[trip_id].max); + } + + return 0; +} + +static const struct seq_operations tze_sops = { + .start = tze_seq_start, + .next = tze_seq_next, + .stop = tze_seq_stop, + .show = tze_seq_show, +}; + +DEFINE_SEQ_ATTRIBUTE(tze); + +void thermal_debug_tz_add(struct thermal_zone_device *tz) +{ + struct thermal_debugfs *thermal_dbg; + struct tz_debugfs *tz_dbg; + + thermal_dbg = thermal_debugfs_add_id(d_tz, tz->id); + if (!thermal_dbg) + return; + + tz_dbg = &thermal_dbg->tz_dbg; + + tz_dbg->trips_crossed = kzalloc(sizeof(int) * tz->num_trips, GFP_KERNEL); + if (!tz_dbg->trips_crossed) { + thermal_debugfs_remove_id(thermal_dbg); + return; + } + + INIT_LIST_HEAD(&tz_dbg->tz_episodes); + + debugfs_create_file("mitigations", 0400, thermal_dbg->d_top, tz, &tze_fops); + + tz->debugfs = thermal_dbg; +} + +void thermal_debug_tz_remove(struct thermal_zone_device *tz) +{ + struct thermal_debugfs *thermal_dbg = tz->debugfs; + + if (!thermal_dbg) + return; + + mutex_lock(&thermal_dbg->lock); + + tz->debugfs = NULL; + + mutex_unlock(&thermal_dbg->lock); + + thermal_debugfs_remove_id(thermal_dbg); +} diff --git a/drivers/thermal/thermal_debugfs.h b/drivers/thermal/thermal_debugfs.h index 341499388448..155b9af5fe87 100644 --- a/drivers/thermal/thermal_debugfs.h +++ b/drivers/thermal/thermal_debugfs.h @@ -5,10 +5,24 @@ void thermal_debug_init(void); void thermal_debug_cdev_add(struct thermal_cooling_device *cdev); void thermal_debug_cdev_remove(struct thermal_cooling_device *cdev); void thermal_debug_cdev_state_update(const struct thermal_cooling_device *cdev, int state); +void thermal_debug_tz_add(struct thermal_zone_device *tz); +void thermal_debug_tz_remove(struct thermal_zone_device *tz); +void thermal_debug_tz_trip_up(struct thermal_zone_device *tz, + const struct thermal_trip *trip); +void thermal_debug_tz_trip_down(struct thermal_zone_device *tz, + const struct thermal_trip *trip); +void thermal_debug_update_temp(struct thermal_zone_device *tz); #else static inline void thermal_debug_init(void) {} static inline void thermal_debug_cdev_add(struct thermal_cooling_device *cdev) {} static inline void thermal_debug_cdev_remove(struct thermal_cooling_device *cdev) {} static inline void thermal_debug_cdev_state_update(const struct thermal_cooling_device *cdev, int state) {} +static inline void thermal_debug_tz_add(struct thermal_zone_device *tz) {} +static inline void thermal_debug_tz_remove(struct thermal_zone_device *tz) {} +static inline void thermal_debug_tz_trip_up(struct thermal_zone_device *tz, + const struct thermal_trip *trip) {}; +static inline void thermal_debug_tz_trip_down(struct thermal_zone_device *tz, + const struct thermal_trip *trip) {} +static inline void thermal_debug_update_temp(struct thermal_zone_device *tz) {} #endif /* CONFIG_THERMAL_DEBUGFS */ -- cgit v1.2.3 From 57a427c81c322c5f0cdfe7c46cdee553d18b1ec6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 5 Jan 2024 14:45:11 +0100 Subject: thermal: core: Use kstrdup_const() during cooling device registration Some *thermal_cooling_device_register() calls pass a string literal as the 'type' parameter, so kstrdup_const() can be used instead of kstrdup() to avoid a memory allocation in such cases. Signed-off-by: Christophe JAILLET [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_core.c | 6 +++--- include/linux/thermal.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index abf4c2390b19..dfaa6341694a 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -845,7 +845,7 @@ static void thermal_release(struct device *dev) sizeof("cooling_device") - 1)) { cdev = to_cooling_device(dev); thermal_cooling_device_destroy_sysfs(cdev); - kfree(cdev->type); + kfree_const(cdev->type); ida_free(&thermal_cdev_ida, cdev->id); kfree(cdev); } @@ -917,7 +917,7 @@ __thermal_cooling_device_register(struct device_node *np, cdev->id = ret; id = ret; - cdev->type = kstrdup(type ? type : "", GFP_KERNEL); + cdev->type = kstrdup_const(type ? type : "", GFP_KERNEL); if (!cdev->type) { ret = -ENOMEM; goto out_ida_remove; @@ -970,7 +970,7 @@ __thermal_cooling_device_register(struct device_node *np, out_cooling_dev: thermal_cooling_device_destroy_sysfs(cdev); out_cdev_type: - kfree(cdev->type); + kfree_const(cdev->type); out_ida_remove: ida_free(&thermal_cdev_ida, id); out_kfree_cdev: diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 7defea8fa223..3227335fb447 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -103,7 +103,7 @@ struct thermal_cooling_device_ops { struct thermal_cooling_device { int id; - char *type; + const char *type; unsigned long max_state; struct device device; struct device_node *np; -- cgit v1.2.3 From 11fde939314836c4375b567d60407d752d987069 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 9 Jan 2024 17:42:04 +0100 Subject: thermal: netlink: Rework notify API for cooling devices In analogy with some previous thermal netlink API changes, redefine thermal_notify_cdev_state_update(), thermal_notify_cdev_add() and thermal_notify_cdev_delete() to take a const cdev pointer as their first argument and let them extract the requisite information from there by themselves. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Daniel Lezcano --- drivers/thermal/thermal_helpers.c | 2 +- drivers/thermal/thermal_netlink.c | 15 ++++++++------- drivers/thermal/thermal_netlink.h | 16 +++++++++------- 3 files changed, 18 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c index 8f85581693e7..dd72eecfc7ca 100644 --- a/drivers/thermal/thermal_helpers.c +++ b/drivers/thermal/thermal_helpers.c @@ -156,7 +156,7 @@ static int thermal_cdev_set_cur_state(struct thermal_cooling_device *cdev, int s */ ret = cdev->ops->set_cur_state(cdev, state); if (!ret) { - thermal_notify_cdev_state_update(cdev->id, state); + thermal_notify_cdev_state_update(cdev, state); thermal_cooling_device_stats_update(cdev, state); thermal_debug_cdev_state_update(cdev, state); } diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index 46b0156a6319..76a231a29654 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -364,24 +364,25 @@ int thermal_notify_tz_trip_change(const struct thermal_zone_device *tz, return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_TRIP_CHANGE, &p); } -int thermal_notify_cdev_state_update(int cdev_id, int cdev_state) +int thermal_notify_cdev_state_update(const struct thermal_cooling_device *cdev, + int state) { - struct param p = { .cdev_id = cdev_id, .cdev_state = cdev_state }; + struct param p = { .cdev_id = cdev->id, .cdev_state = state }; return thermal_genl_send_event(THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, &p); } -int thermal_notify_cdev_add(int cdev_id, const char *name, int cdev_max_state) +int thermal_notify_cdev_add(const struct thermal_cooling_device *cdev) { - struct param p = { .cdev_id = cdev_id, .name = name, - .cdev_max_state = cdev_max_state }; + struct param p = { .cdev_id = cdev->id, .name = cdev->type, + .cdev_max_state = cdev->max_state }; return thermal_genl_send_event(THERMAL_GENL_EVENT_CDEV_ADD, &p); } -int thermal_notify_cdev_delete(int cdev_id) +int thermal_notify_cdev_delete(const struct thermal_cooling_device *cdev) { - struct param p = { .cdev_id = cdev_id }; + struct param p = { .cdev_id = cdev->id }; return thermal_genl_send_event(THERMAL_GENL_EVENT_CDEV_DELETE, &p); } diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h index 13ea2ad23d3a..93a927e144d5 100644 --- a/drivers/thermal/thermal_netlink.h +++ b/drivers/thermal/thermal_netlink.h @@ -12,6 +12,7 @@ struct thermal_genl_cpu_caps { struct thermal_zone_device; struct thermal_trip; +struct thermal_cooling_device; /* Netlink notification function */ #ifdef CONFIG_THERMAL_NETLINK @@ -27,9 +28,10 @@ int thermal_notify_tz_trip_up(const struct thermal_zone_device *tz, const struct thermal_trip *trip); int thermal_notify_tz_trip_change(const struct thermal_zone_device *tz, const struct thermal_trip *trip); -int thermal_notify_cdev_state_update(int cdev_id, int state); -int thermal_notify_cdev_add(int cdev_id, const char *name, int max_state); -int thermal_notify_cdev_delete(int cdev_id); +int thermal_notify_cdev_state_update(const struct thermal_cooling_device *cdev, + int state); +int thermal_notify_cdev_add(const struct thermal_cooling_device *cdev); +int thermal_notify_cdev_delete(const struct thermal_cooling_device *cdev); int thermal_notify_tz_gov_change(const struct thermal_zone_device *tz, const char *name); int thermal_genl_sampling_temp(int id, int temp); @@ -79,18 +81,18 @@ static inline int thermal_notify_tz_trip_change(const struct thermal_zone_device return 0; } -static inline int thermal_notify_cdev_state_update(int cdev_id, int state) +static inline int thermal_notify_cdev_state_update(const struct thermal_cooling_device *cdev, + int state) { return 0; } -static inline int thermal_notify_cdev_add(int cdev_id, const char *name, - int max_state) +static inline int thermal_notify_cdev_add(const struct thermal_cooling_device *cdev) { return 0; } -static inline int thermal_notify_cdev_delete(int cdev_id) +static inline int thermal_notify_cdev_delete(const struct thermal_cooling_device *cdev) { return 0; } -- cgit v1.2.3 From fd881eac3af6c4e36b34ce92d69fb1d7e95f5920 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 9 Jan 2024 17:42:48 +0100 Subject: thermal: helpers: Rearrange thermal_cdev_set_cur_state() Change the code layout in thermal_cdev_set_cur_state() so it returns early on errors which is more consistent with what happens elsewhere. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Daniel Lezcano --- drivers/thermal/thermal_helpers.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c index dd72eecfc7ca..0329f4a71b02 100644 --- a/drivers/thermal/thermal_helpers.c +++ b/drivers/thermal/thermal_helpers.c @@ -155,13 +155,14 @@ static int thermal_cdev_set_cur_state(struct thermal_cooling_device *cdev, int s * registering function checked the ops are correctly set */ ret = cdev->ops->set_cur_state(cdev, state); - if (!ret) { - thermal_notify_cdev_state_update(cdev, state); - thermal_cooling_device_stats_update(cdev, state); - thermal_debug_cdev_state_update(cdev, state); - } + if (ret) + return ret; - return ret; + thermal_notify_cdev_state_update(cdev, state); + thermal_cooling_device_stats_update(cdev, state); + thermal_debug_cdev_state_update(cdev, state); + + return 0; } void __thermal_cdev_update(struct thermal_cooling_device *cdev) -- cgit v1.2.3 From e95fa7404716f6e25021e66067271a4ad8eb1486 Mon Sep 17 00:00:00 2001 From: Di Shen Date: Wed, 10 Jan 2024 19:55:26 +0800 Subject: thermal: gov_power_allocator: avoid inability to reset a cdev Commit 0952177f2a1f ("thermal/core/power_allocator: Update once cooling devices when temp is low") adds an update flag to avoid triggering a thermal event when there is no need, and the thermal cdev is updated once when the temperature is low. But when the trips are writable, and switch_on_temp is set to be a higher value, the cooling device state may not be reset to 0, because last_temperature is smaller than switch_on_temp. For example: First: switch_on_temp=70 control_temp=85; Then userspace change the trip_temp: switch_on_temp=45 control_temp=55 cur_temp=54 Then userspace reset the trip_temp: switch_on_temp=70 control_temp=85 cur_temp=57 last_temp=54 At this time, the cooling device state should be reset to 0. However, because cur_temp(57) < switch_on_temp(70) last_temp(54) < switch_on_temp(70) ----> update = false, update is false, the cooling device state can not be reset. Using the observation that tz->passive can also be regarded as the temperature status, set the update flag to the tz->passive value. When the temperature drops below switch_on for the first time, the states of cooling devices can be reset once, and tz->passive is updated to 0. In the next round, because tz->passive is 0, cdev->state will not be updated. By using the tz->passive value as the "update" flag, the issue above can be solved, and the cooling devices can be updated only once when the temperature is low. Fixes: 0952177f2a1f ("thermal/core/power_allocator: Update once cooling devices when temp is low") Cc: 5.13+ # 5.13+ Suggested-by: Wei Wang Signed-off-by: Di Shen Reviewed-by: Lukasz Luba [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/gov_power_allocator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 7b6aa265ff6a..81e061f183ad 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -762,7 +762,7 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, trip = params->trip_switch_on; if (trip && tz->temperature < trip->temperature) { - update = tz->last_temperature >= trip->temperature; + update = tz->passive; tz->passive = 0; reset_pid_controller(params); allow_maximum_power(tz, update); -- cgit v1.2.3 From 97566d09fd02d2ab329774bb89a2cdf2267e86d9 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Tue, 9 Jan 2024 19:07:04 -0800 Subject: thermal: intel: hfi: Add syscore callbacks for system-wide PM The kernel allocates a memory buffer and provides its location to the hardware, which uses it to update the HFI table. This allocation occurs during boot and remains constant throughout runtime. When resuming from hibernation, the restore kernel allocates a second memory buffer and reprograms the HFI hardware with the new location as part of a normal boot. The location of the second memory buffer may differ from the one allocated by the image kernel. When the restore kernel transfers control to the image kernel, its HFI buffer becomes invalid, potentially leading to memory corruption if the hardware writes to it (the hardware continues to use the buffer from the restore kernel). It is also possible that the hardware "forgets" the address of the memory buffer when resuming from "deep" suspend. Memory corruption may also occur in such a scenario. To prevent the described memory corruption, disable HFI when preparing to suspend or hibernate. Enable it when resuming. Add syscore callbacks to handle the package of the boot CPU (packages of non-boot CPUs are handled via CPU offline). Syscore ops always run on the boot CPU. Additionally, HFI only needs to be disabled during "deep" suspend and hibernation. Syscore ops only run in these cases. Cc: 6.1+ # 6.1+ Signed-off-by: Ricardo Neri [ rjw: Comment adjustment, subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_hfi.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers') diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index 22445403b520..3b04c6ec4fca 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -35,7 +35,9 @@ #include #include #include +#include #include +#include #include #include @@ -571,6 +573,30 @@ static __init int hfi_parse_features(void) return 0; } +static void hfi_do_enable(void) +{ + /* This code runs only on the boot CPU. */ + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0); + struct hfi_instance *hfi_instance = info->hfi_instance; + + /* No locking needed. There is no concurrency with CPU online. */ + hfi_set_hw_table(hfi_instance); + hfi_enable(); +} + +static int hfi_do_disable(void) +{ + /* No locking needed. There is no concurrency with CPU offline. */ + hfi_disable(); + + return 0; +} + +static struct syscore_ops hfi_pm_ops = { + .resume = hfi_do_enable, + .suspend = hfi_do_disable, +}; + void __init intel_hfi_init(void) { struct hfi_instance *hfi_instance; @@ -602,6 +628,8 @@ void __init intel_hfi_init(void) if (!hfi_updates_wq) goto err_nomem; + register_syscore_ops(&hfi_pm_ops); + return; err_nomem: -- cgit v1.2.3 From 6dcb35088e264306b948141971286257681ca412 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jan 2024 17:30:18 +0300 Subject: thermal/debugfs: Unlock on error path in thermal_debug_tz_trip_up() Add a missing mutex_unlock(&thermal_dbg->lock) to this error path. Fixes: 7ef01f228c9f ("thermal/debugfs: Add thermal debugfs information for mitigation episodes") Signed-off-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c index b53881e9e0b6..c617e8b9f0dd 100644 --- a/drivers/thermal/thermal_debugfs.c +++ b/drivers/thermal/thermal_debugfs.c @@ -592,7 +592,7 @@ void thermal_debug_tz_trip_up(struct thermal_zone_device *tz, if (!tz_dbg->nr_trips) { tze = thermal_debugfs_tz_event_alloc(tz, now); if (!tze) - return; + goto unlock; list_add(&tze->node, &tz_dbg->tz_episodes); } @@ -620,6 +620,7 @@ void thermal_debug_tz_trip_up(struct thermal_zone_device *tz, (temperature - tze->trip_stats[trip_id].avg) / tze->trip_stats[trip_id].count; +unlock: mutex_unlock(&thermal_dbg->lock); } -- cgit v1.2.3