diff options
-rw-r--r-- | drivers/thermal/thermal_core.c | 58 | ||||
-rw-r--r-- | drivers/thermal/thermal_core.h | 10 |
2 files changed, 61 insertions, 7 deletions
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 48bf267d090d..95c399f94744 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -288,6 +288,28 @@ static int __thermal_zone_device_set_mode(struct thermal_zone_device *tz, return 0; } +static void thermal_zone_broken_disable(struct thermal_zone_device *tz) +{ + struct thermal_trip_desc *td; + + dev_err(&tz->device, "Unable to get temperature, disabling!\n"); + /* + * This function only runs for enabled thermal zones, so no need to + * check for the current mode. + */ + __thermal_zone_device_set_mode(tz, THERMAL_DEVICE_DISABLED); + thermal_notify_tz_disable(tz); + + for_each_trip_desc(tz, td) { + if (td->trip.type == THERMAL_TRIP_CRITICAL && + td->trip.temperature > THERMAL_TEMP_INVALID) { + dev_crit(&tz->device, + "Disabled thermal zone with critical trip point\n"); + return; + } + } +} + /* * Zone update section: main control loop applied to each zone while monitoring * in polling mode. The monitoring is done using a workqueue. @@ -308,6 +330,34 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, cancel_delayed_work(&tz->poll_queue); } +static void thermal_zone_recheck(struct thermal_zone_device *tz, int error) +{ + if (error == -EAGAIN) { + thermal_zone_device_set_polling(tz, THERMAL_RECHECK_DELAY); + return; + } + + /* + * Print the message once to reduce log noise. It will be followed by + * another one if the temperature cannot be determined after multiple + * attempts. + */ + if (tz->recheck_delay_jiffies == THERMAL_RECHECK_DELAY) + dev_info(&tz->device, "Temperature check failed (%d)\n", error); + + thermal_zone_device_set_polling(tz, tz->recheck_delay_jiffies); + + tz->recheck_delay_jiffies += max(tz->recheck_delay_jiffies >> 1, 1ULL); + if (tz->recheck_delay_jiffies > THERMAL_MAX_RECHECK_DELAY) { + thermal_zone_broken_disable(tz); + /* + * Restore the original recheck delay value to allow the thermal + * zone to try to recover when it is reenabled by user space. + */ + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; + } +} + static void monitor_thermal_zone(struct thermal_zone_device *tz) { if (tz->mode != THERMAL_DEVICE_ENABLED) @@ -507,10 +557,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, ret = __thermal_zone_get_temp(tz, &temp); if (ret) { - if (ret != -EAGAIN) - dev_info(&tz->device, "Temperature check failed (%d)\n", ret); - - thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS)); + thermal_zone_recheck(tz, ret); return; } else if (temp <= THERMAL_TEMP_INVALID) { /* @@ -522,6 +569,8 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, goto monitor; } + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; + tz->last_temperature = tz->temperature; tz->temperature = temp; @@ -1462,6 +1511,7 @@ thermal_zone_device_register_with_trips(const char *type, thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay); thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay); + tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY; /* sys I/F */ /* Add nodes that are always present via .groups */ diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index ba8e6fc807ca..4cf2b7230d04 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -67,6 +67,8 @@ struct thermal_governor { * @polling_delay_jiffies: number of jiffies to wait between polls when * checking whether trip points have been crossed (0 for * interrupt driven systems) + * @recheck_delay_jiffies: delay after a failed attempt to determine the zone + * temperature before trying again * @temperature: current temperature. This is only for core code, * drivers should use thermal_zone_get_temp() to get the * current temperature @@ -108,6 +110,7 @@ struct thermal_zone_device { int num_trips; unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; + unsigned long recheck_delay_jiffies; int temperature; int last_temperature; int emul_temperature; @@ -137,10 +140,11 @@ struct thermal_zone_device { #define THERMAL_TEMP_INIT INT_MIN /* - * Default delay after a failing thermal zone temperature check before - * attempting to check it again. + * Default and maximum delay after a failed thermal zone temperature check + * before attempting to check it again (in jiffies). */ -#define THERMAL_RECHECK_DELAY_MS 250 +#define THERMAL_RECHECK_DELAY msecs_to_jiffies(250) +#define THERMAL_MAX_RECHECK_DELAY (120 * HZ) /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) |