diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2023-02-15 17:18:08 +0100 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2023-02-15 17:18:08 +0100 |
commit | badf1f90502d3fc627a18880dfafd8c636699baf (patch) | |
tree | d069f30768766fa721b6f311351de2e0df53f8c1 /drivers/thermal | |
parent | c3bd6d539f34784b235c7070edba978f67516372 (diff) | |
parent | fef1f0be10c61dd16fd370964c316c399483448f (diff) | |
download | lwn-badf1f90502d3fc627a18880dfafd8c636699baf.tar.gz lwn-badf1f90502d3fc627a18880dfafd8c636699baf.zip |
Merge branch 'thermal-intel'
Merge thermal control changes related to Intel platforms for 6.3-rc1:
- Rework ACPI helper functions for thermal control to retrieve a trip
point temperature instead of initializing a trip point objetc (Rafael
Wysocki).
- Clean up and improve the int340x thermal driver ((Rafael Wysocki).
- Simplify and clean up the intel_pch thermal driver ((Rafael Wysocki).
- Fix the Intel powerclamp thermal driver and make it use the common
idle injection framework (Srinivas Pandruvada).
- Add two module parameters, cpumask and max_idle, to the Intel powerclamp
thermal driver to allow it to affect only a specific subset of CPUs
instead of all of them (Srinivas Pandruvada).
- Make the Intel quark_dts thermal driver Use generic trip point
objects instead of its own trip point representation (Daniel
Lezcano).
- Add toctree entry for thermal documents and fix two issues in the
Intel powerclamp driver documentation (Bagas Sanjaya).
* thermal-intel: (25 commits)
Documentation: powerclamp: Fix numbered lists formatting
Documentation: powerclamp: Escape wildcard in cpumask description
Documentation: admin-guide: Add toctree entry for thermal docs
thermal: intel: powerclamp: Add two module parameters
Documentation: admin-guide: Move intel_powerclamp documentation
thermal: intel: powerclamp: Fix duration module parameter
thermal: intel: powerclamp: Return last requested state as cur_state
thermal: intel: quark_dts: Use generic trip points
thermal: intel: powerclamp: Use powercap idle-inject feature
powercap: idle_inject: Add update callback
powercap: idle_inject: Export symbols
thermal: intel: powerclamp: Fix cur_state for multi package system
thermal: intel: intel_pch: Drop struct board_info
thermal: intel: intel_pch: Rename board ID symbols
thermal: intel: intel_pch: Fold suspend and resume routines into their callers
thermal: intel: intel_pch: Fold two functions into their callers
thermal: intel: intel_pch: Eliminate device operations object
thermal: intel: intel_pch: Rename device operations callbacks
thermal: intel: intel_pch: Eliminate redundant return pointers
thermal: intel: intel_pch: Make pch_wpt_add_acpi_psv_trip() return int
...
Diffstat (limited to 'drivers/thermal')
-rw-r--r-- | drivers/thermal/intel/Kconfig | 3 | ||||
-rw-r--r-- | drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c | 127 | ||||
-rw-r--r-- | drivers/thermal/intel/intel_pch_thermal.c | 352 | ||||
-rw-r--r-- | drivers/thermal/intel/intel_powerclamp.c | 545 | ||||
-rw-r--r-- | drivers/thermal/intel/intel_quark_dts_thermal.c | 55 | ||||
-rw-r--r-- | drivers/thermal/thermal_acpi.c | 108 |
6 files changed, 580 insertions, 610 deletions
diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig index e50fd260484a..b5808f92702d 100644 --- a/drivers/thermal/intel/Kconfig +++ b/drivers/thermal/intel/Kconfig @@ -3,6 +3,9 @@ config INTEL_POWERCLAMP tristate "Intel PowerClamp idle injection driver" depends on X86 depends on CPU_SUP_INTEL + depends on CPU_IDLE + select POWERCAP + select IDLE_INJECT help Enable this to enable Intel PowerClamp idle injection driver. This enforce idle time which results in more package C-state residency. The diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index 09b1b51eb6a5..00665967ca52 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -29,24 +29,27 @@ static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone, if (conv_temp < 0) return conv_temp; - *temp = (unsigned long)conv_temp * 10; - } else + *temp = conv_temp * 10; + } else { /* _TMP returns the temperature in tenths of degrees Kelvin */ *temp = deci_kelvin_to_millicelsius(tmp); + } return 0; } static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, - int trip, int temp) + int trip, int temp) { struct int34x_thermal_zone *d = zone->devdata; + char name[] = {'P', 'A', 'T', '0' + trip, '\0'}; acpi_status status; - char name[10]; - snprintf(name, sizeof(name), "PAT%d", trip); + if (trip > 9) + return -EINVAL; + status = acpi_execute_simple_method(d->adev->handle, name, - millicelsius_to_deci_kelvin(temp)); + millicelsius_to_deci_kelvin(temp)); if (ACPI_FAILURE(status)) return -EIO; @@ -70,24 +73,34 @@ static int int340x_thermal_read_trips(struct acpi_device *zone_adev, { int i, ret; - ret = thermal_acpi_trip_critical(zone_adev, &zone_trips[trip_cnt]); - if (!ret) + ret = thermal_acpi_critical_trip_temp(zone_adev, + &zone_trips[trip_cnt].temperature); + if (!ret) { + zone_trips[trip_cnt].type = THERMAL_TRIP_CRITICAL; trip_cnt++; + } - ret = thermal_acpi_trip_hot(zone_adev, &zone_trips[trip_cnt]); - if (!ret) + ret = thermal_acpi_hot_trip_temp(zone_adev, + &zone_trips[trip_cnt].temperature); + if (!ret) { + zone_trips[trip_cnt].type = THERMAL_TRIP_HOT; trip_cnt++; + } - ret = thermal_acpi_trip_passive(zone_adev, &zone_trips[trip_cnt]); - if (!ret) + ret = thermal_acpi_passive_trip_temp(zone_adev, + &zone_trips[trip_cnt].temperature); + if (!ret) { + zone_trips[trip_cnt].type = THERMAL_TRIP_PASSIVE; trip_cnt++; + } for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) { - - ret = thermal_acpi_trip_active(zone_adev, i, &zone_trips[trip_cnt]); + ret = thermal_acpi_active_trip_temp(zone_adev, i, + &zone_trips[trip_cnt].temperature); if (ret) break; + zone_trips[trip_cnt].type = THERMAL_TRIP_ACTIVE; trip_cnt++; } @@ -102,7 +115,7 @@ static struct thermal_zone_params int340x_thermal_params = { struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, int (*get_temp) (struct thermal_zone_device *, int *)) { - struct int34x_thermal_zone *int34x_thermal_zone; + struct int34x_thermal_zone *int34x_zone; struct thermal_trip *zone_trips; unsigned long long trip_cnt = 0; unsigned long long hyst; @@ -110,26 +123,25 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, acpi_status status; int i, ret; - int34x_thermal_zone = kzalloc(sizeof(*int34x_thermal_zone), - GFP_KERNEL); - if (!int34x_thermal_zone) + int34x_zone = kzalloc(sizeof(*int34x_zone), GFP_KERNEL); + if (!int34x_zone) return ERR_PTR(-ENOMEM); - int34x_thermal_zone->adev = adev; + int34x_zone->adev = adev; - int34x_thermal_zone->ops = kmemdup(&int340x_thermal_zone_ops, - sizeof(int340x_thermal_zone_ops), GFP_KERNEL); - if (!int34x_thermal_zone->ops) { + int34x_zone->ops = kmemdup(&int340x_thermal_zone_ops, + sizeof(int340x_thermal_zone_ops), GFP_KERNEL); + if (!int34x_zone->ops) { ret = -ENOMEM; goto err_ops_alloc; } if (get_temp) - int34x_thermal_zone->ops->get_temp = get_temp; + int34x_zone->ops->get_temp = get_temp; status = acpi_evaluate_integer(adev->handle, "PATC", NULL, &trip_cnt); - if (!ACPI_FAILURE(status)) { - int34x_thermal_zone->aux_trip_nr = trip_cnt; + if (ACPI_SUCCESS(status)) { + int34x_zone->aux_trip_nr = trip_cnt; trip_mask = BIT(trip_cnt) - 1; } @@ -156,49 +168,47 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, for (i = 0; i < trip_cnt; ++i) zone_trips[i].hysteresis = hyst; - int34x_thermal_zone->trips = zone_trips; + int34x_zone->trips = zone_trips; - int34x_thermal_zone->lpat_table = acpi_lpat_get_conversion_table( - adev->handle); + int34x_zone->lpat_table = acpi_lpat_get_conversion_table(adev->handle); - int34x_thermal_zone->zone = thermal_zone_device_register_with_trips( - acpi_device_bid(adev), - zone_trips, trip_cnt, - trip_mask, int34x_thermal_zone, - int34x_thermal_zone->ops, - &int340x_thermal_params, - 0, 0); - if (IS_ERR(int34x_thermal_zone->zone)) { - ret = PTR_ERR(int34x_thermal_zone->zone); + int34x_zone->zone = thermal_zone_device_register_with_trips( + acpi_device_bid(adev), + zone_trips, trip_cnt, + trip_mask, int34x_zone, + int34x_zone->ops, + &int340x_thermal_params, + 0, 0); + if (IS_ERR(int34x_zone->zone)) { + ret = PTR_ERR(int34x_zone->zone); goto err_thermal_zone; } - ret = thermal_zone_device_enable(int34x_thermal_zone->zone); + ret = thermal_zone_device_enable(int34x_zone->zone); if (ret) goto err_enable; - return int34x_thermal_zone; + return int34x_zone; err_enable: - thermal_zone_device_unregister(int34x_thermal_zone->zone); + thermal_zone_device_unregister(int34x_zone->zone); err_thermal_zone: - kfree(int34x_thermal_zone->trips); - acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); + kfree(int34x_zone->trips); + acpi_lpat_free_conversion_table(int34x_zone->lpat_table); err_trips_alloc: - kfree(int34x_thermal_zone->ops); + kfree(int34x_zone->ops); err_ops_alloc: - kfree(int34x_thermal_zone); + kfree(int34x_zone); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(int340x_thermal_zone_add); -void int340x_thermal_zone_remove(struct int34x_thermal_zone - *int34x_thermal_zone) +void int340x_thermal_zone_remove(struct int34x_thermal_zone *int34x_zone) { - thermal_zone_device_unregister(int34x_thermal_zone->zone); - acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); - kfree(int34x_thermal_zone->trips); - kfree(int34x_thermal_zone->ops); - kfree(int34x_thermal_zone); + thermal_zone_device_unregister(int34x_zone->zone); + acpi_lpat_free_conversion_table(int34x_zone->lpat_table); + kfree(int34x_zone->trips); + kfree(int34x_zone->ops); + kfree(int34x_zone); } EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove); @@ -213,22 +223,21 @@ void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone) mutex_lock(&int34x_zone->zone->lock); for (i = int34x_zone->aux_trip_nr; i < trip_cnt; i++) { - struct thermal_trip trip; - int err; + int temp, err; switch (zone_trips[i].type) { case THERMAL_TRIP_CRITICAL: - err = thermal_acpi_trip_critical(zone_adev, &trip); + err = thermal_acpi_critical_trip_temp(zone_adev, &temp); break; case THERMAL_TRIP_HOT: - err = thermal_acpi_trip_hot(zone_adev, &trip); + err = thermal_acpi_hot_trip_temp(zone_adev, &temp); break; case THERMAL_TRIP_PASSIVE: - err = thermal_acpi_trip_passive(zone_adev, &trip); + err = thermal_acpi_passive_trip_temp(zone_adev, &temp); break; case THERMAL_TRIP_ACTIVE: - err = thermal_acpi_trip_active(zone_adev, act_trip_nr++, - &trip); + err = thermal_acpi_active_trip_temp(zone_adev, act_trip_nr++, + &temp); break; default: err = -ENODEV; @@ -238,7 +247,7 @@ void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone) continue; } - zone_trips[i].temperature = trip.temperature; + zone_trips[i].temperature = temp; } mutex_unlock(&int34x_zone->zone->lock); diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 45a9ea86907e..b855d031a855 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -82,7 +82,6 @@ static char driver_name[] = "Intel PCH thermal driver"; struct pch_thermal_device { void __iomem *hw_base; - const struct pch_dev_ops *ops; struct pci_dev *pdev; struct thermal_zone_device *tzd; struct thermal_trip trips[PCH_MAX_TRIPS]; @@ -90,42 +89,107 @@ struct pch_thermal_device { }; #ifdef CONFIG_ACPI - /* * On some platforms, there is a companion ACPI device, which adds * passive trip temperature using _PSV method. There is no specific * passive temperature setting in MMIO interface of this PCI device. */ -static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, - int *nr_trips) +static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) { struct acpi_device *adev; - int ret; + int temp; adev = ACPI_COMPANION(&ptd->pdev->dev); if (!adev) - return; + return 0; - ret = thermal_acpi_trip_passive(adev, &ptd->trips[*nr_trips]); - if (ret || ptd->trips[*nr_trips].temperature <= 0) - return; + if (thermal_acpi_passive_trip_temp(adev, &temp) || temp <= 0) + return 0; - ++(*nr_trips); + ptd->trips[trip].type = THERMAL_TRIP_PASSIVE; + ptd->trips[trip].temperature = temp; + return 1; } #else -static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, - int *nr_trips) +static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip) { - + return 0; } #endif -static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips) +static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) { - u8 tsel; + struct pch_thermal_device *ptd = tzd->devdata; + + *temp = GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); + return 0; +} + +static void pch_critical(struct thermal_zone_device *tzd) +{ + dev_dbg(&tzd->device, "%s: critical temperature reached\n", tzd->type); +} + +static struct thermal_zone_device_ops tzd_ops = { + .get_temp = pch_thermal_get_temp, + .critical = pch_critical, +}; + +enum pch_board_ids { + PCH_BOARD_HSW = 0, + PCH_BOARD_WPT, + PCH_BOARD_SKL, + PCH_BOARD_CNL, + PCH_BOARD_CML, + PCH_BOARD_LWB, + PCH_BOARD_WBG, +}; + +static const char *board_names[] = { + [PCH_BOARD_HSW] = "pch_haswell", + [PCH_BOARD_WPT] = "pch_wildcat_point", + [PCH_BOARD_SKL] = "pch_skylake", + [PCH_BOARD_CNL] = "pch_cannonlake", + [PCH_BOARD_CML] = "pch_cometlake", + [PCH_BOARD_LWB] = "pch_lewisburg", + [PCH_BOARD_WBG] = "pch_wellsburg", +}; + +static int intel_pch_thermal_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + enum pch_board_ids board_id = id->driver_data; + struct pch_thermal_device *ptd; + int nr_trips = 0; u16 trip_temp; + u8 tsel; + int err; + + ptd = devm_kzalloc(&pdev->dev, sizeof(*ptd), GFP_KERNEL); + if (!ptd) + return -ENOMEM; + + pci_set_drvdata(pdev, ptd); + ptd->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "failed to enable pci device\n"); + return err; + } + + err = pci_request_regions(pdev, driver_name); + if (err) { + dev_err(&pdev->dev, "failed to request pci region\n"); + goto error_disable; + } - *nr_trips = 0; + ptd->hw_base = pci_ioremap_bar(pdev, 0); + if (!ptd->hw_base) { + err = -ENOMEM; + dev_err(&pdev->dev, "failed to map mem base\n"); + goto error_release; + } /* Check if BIOS has already enabled thermal sensor */ if (WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL)) { @@ -140,50 +204,79 @@ static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips) */ if (tsel & WPT_TSEL_PLDB) { dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); - return -ENODEV; + err = -ENODEV; + goto error_cleanup; } writeb(tsel|WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); if (!(WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL))) { dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n"); - return -ENODEV; + err = -ENODEV; + goto error_cleanup; } read_trips: trip_temp = readw(ptd->hw_base + WPT_CTT); trip_temp &= 0x1FF; if (trip_temp) { - ptd->trips[*nr_trips].temperature = GET_WPT_TEMP(trip_temp); - ptd->trips[*nr_trips].type = THERMAL_TRIP_CRITICAL; - ++(*nr_trips); + ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); + ptd->trips[nr_trips++].type = THERMAL_TRIP_CRITICAL; } trip_temp = readw(ptd->hw_base + WPT_PHL); trip_temp &= 0x1FF; if (trip_temp) { - ptd->trips[*nr_trips].temperature = GET_WPT_TEMP(trip_temp); - ptd->trips[*nr_trips].type = THERMAL_TRIP_HOT; - ++(*nr_trips); + ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp); + ptd->trips[nr_trips++].type = THERMAL_TRIP_HOT; } - pch_wpt_add_acpi_psv_trip(ptd, nr_trips); + nr_trips += pch_wpt_add_acpi_psv_trip(ptd, nr_trips); + + ptd->tzd = thermal_zone_device_register_with_trips(board_names[board_id], + ptd->trips, nr_trips, + 0, ptd, &tzd_ops, + NULL, 0, 0); + if (IS_ERR(ptd->tzd)) { + dev_err(&pdev->dev, "Failed to register thermal zone %s\n", + board_names[board_id]); + err = PTR_ERR(ptd->tzd); + goto error_cleanup; + } + err = thermal_zone_device_enable(ptd->tzd); + if (err) + goto err_unregister; return 0; + +err_unregister: + thermal_zone_device_unregister(ptd->tzd); +error_cleanup: + iounmap(ptd->hw_base); +error_release: + pci_release_regions(pdev); +error_disable: + pci_disable_device(pdev); + dev_err(&pdev->dev, "pci device failed to probe\n"); + return err; } -static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp) +static void intel_pch_thermal_remove(struct pci_dev *pdev) { - *temp = GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP)); + struct pch_thermal_device *ptd = pci_get_drvdata(pdev); - return 0; + thermal_zone_device_unregister(ptd->tzd); + iounmap(ptd->hw_base); + pci_set_drvdata(pdev, NULL); + pci_release_regions(pdev); + pci_disable_device(pdev); } -/* Cool the PCH when it's overheat in .suspend_noirq phase */ -static int pch_wpt_suspend(struct pch_thermal_device *ptd) +static int intel_pch_thermal_suspend_noirq(struct device *device) { - u8 tsel; - int pch_delay_cnt = 0; + struct pch_thermal_device *ptd = dev_get_drvdata(device); u16 pch_thr_temp, pch_cur_temp; + int pch_delay_cnt = 0; + u8 tsel; /* Shutdown the thermal sensor if it is not enabled by BIOS */ if (!ptd->bios_enabled) { @@ -246,8 +339,9 @@ static int pch_wpt_suspend(struct pch_thermal_device *ptd) return 0; } -static int pch_wpt_resume(struct pch_thermal_device *ptd) +static int intel_pch_thermal_resume(struct device *device) { + struct pch_thermal_device *ptd = dev_get_drvdata(device); u8 tsel; if (ptd->bios_enabled) @@ -260,199 +354,29 @@ static int pch_wpt_resume(struct pch_thermal_device *ptd) return 0; } -struct pch_dev_ops { - int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips); - int (*get_temp)(struct pch_thermal_device *ptd, int *temp); - int (*suspend)(struct pch_thermal_device *ptd); - int (*resume)(struct pch_thermal_device *ptd); -}; - - -/* dev ops for Wildcat Point */ -static const struct pch_dev_ops pch_dev_ops_wpt = { - .hw_init = pch_wpt_init, - .get_temp = pch_wpt_get_temp, - .suspend = pch_wpt_suspend, - .resume = pch_wpt_resume, -}; - -static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) -{ - struct pch_thermal_device *ptd = tzd->devdata; - - return ptd->ops->get_temp(ptd, temp); -} - -static void pch_critical(struct thermal_zone_device *tzd) -{ - dev_dbg(&tzd->device, "%s: critical temperature reached\n", tzd->type); -} - -static struct thermal_zone_device_ops tzd_ops = { - .get_temp = pch_thermal_get_temp, - .critical = pch_critical, -}; - -enum board_ids { - board_hsw, - board_wpt, - board_skl, - board_cnl, - board_cml, - board_lwb, - board_wbg, -}; - -static const struct board_info { - const char *name; - const struct pch_dev_ops *ops; -} board_info[] = { - [board_hsw] = { - .name = "pch_haswell", - .ops = &pch_dev_ops_wpt, - }, - [board_wpt] = { - .name = "pch_wildcat_point", - .ops = &pch_dev_ops_wpt, - }, - [board_skl] = { - .name = "pch_skylake", - .ops = &pch_dev_ops_wpt, - }, - [board_cnl] = { - .name = "pch_cannonlake", - .ops = &pch_dev_ops_wpt, - }, - [board_cml] = { - .name = "pch_cometlake", - .ops = &pch_dev_ops_wpt, - }, - [board_lwb] = { - .name = "pch_lewisburg", - .ops = &pch_dev_ops_wpt, - }, - [board_wbg] = { - .name = "pch_wellsburg", - .ops = &pch_dev_ops_wpt, - }, -}; - -static int intel_pch_thermal_probe(struct pci_dev *pdev, - const struct pci_device_id *id) -{ - enum board_ids board_id = id->driver_data; - const struct board_info *bi = &board_info[board_id]; - struct pch_thermal_device *ptd; - int err; - int nr_trips; - - ptd = devm_kzalloc(&pdev->dev, sizeof(*ptd), GFP_KERNEL); - if (!ptd) - return -ENOMEM; - - ptd->ops = bi->ops; - - pci_set_drvdata(pdev, ptd); - ptd->pdev = pdev; - - err = pci_enable_device(pdev); - if (err) { - dev_err(&pdev->dev, "failed to enable pci device\n"); - return err; - } - - err = pci_request_regions(pdev, driver_name); - if (err) { - dev_err(&pdev->dev, "failed to request pci region\n"); - goto error_disable; - } - - ptd->hw_base = pci_ioremap_bar(pdev, 0); - if (!ptd->hw_base) { - err = -ENOMEM; - dev_err(&pdev->dev, "failed to map mem base\n"); - goto error_release; - } - - err = ptd->ops->hw_init(ptd, &nr_trips); - if (err) - goto error_cleanup; - - ptd->tzd = thermal_zone_device_register_with_trips(bi->name, ptd->trips, - nr_trips, 0, ptd, - &tzd_ops, NULL, 0, 0); - if (IS_ERR(ptd->tzd)) { - dev_err(&pdev->dev, "Failed to register thermal zone %s\n", - bi->name); - err = PTR_ERR(ptd->tzd); - goto error_cleanup; - } - err = thermal_zone_device_enable(ptd->tzd); - if (err) - goto err_unregister; - - return 0; - -err_unregister: - thermal_zone_device_unregister(ptd->tzd); -error_cleanup: - iounmap(ptd->hw_base); -error_release: - pci_release_regions(pdev); -error_disable: - pci_disable_device(pdev); - dev_err(&pdev->dev, "pci device failed to probe\n"); - return err; -} - -static void intel_pch_thermal_remove(struct pci_dev *pdev) -{ - struct pch_thermal_device *ptd = pci_get_drvdata(pdev); - - thermal_zone_device_unregister(ptd->tzd); - iounmap(ptd->hw_base); - pci_set_drvdata(pdev, NULL); - pci_release_regions(pdev); - pci_disable_device(pdev); -} - -static int intel_pch_thermal_suspend_noirq(struct device *device) -{ - struct pch_thermal_device *ptd = dev_get_drvdata(device); - - return ptd->ops->suspend(ptd); -} - -static int intel_pch_thermal_resume(struct device *device) -{ - struct pch_thermal_device *ptd = dev_get_drvdata(device); - - return ptd->ops->resume(ptd); -} - static const struct pci_device_id intel_pch_thermal_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_1), - .driver_data = board_hsw, }, + .driver_data = PCH_BOARD_HSW, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_2), - .driver_data = board_hsw, }, + .driver_data = PCH_BOARD_HSW, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT), - .driver_data = board_wpt, }, + .driver_data = PCH_BOARD_WPT, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL), - .driver_data = board_skl, }, + .driver_data = PCH_BOARD_SKL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL_H), - .driver_data = board_skl, }, + .driver_data = PCH_BOARD_SKL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL), - .driver_data = board_cnl, }, + .driver_data = PCH_BOARD_CNL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL_H), - .driver_data = board_cnl, }, + .driver_data = PCH_BOARD_CNL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL_LP), - .driver_data = board_cnl, }, + .driver_data = PCH_BOARD_CNL, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CML_H), - .driver_data = board_cml, }, + .driver_data = PCH_BOARD_CML, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_LWB), - .driver_data = board_lwb, }, + .driver_data = PCH_BOARD_LWB, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WBG), - .driver_data = board_wbg, }, + .driver_data = PCH_BOARD_WBG, }, { 0, }, }; MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id); diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index b80e25ec1261..c7ba5680cd48 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -2,7 +2,7 @@ /* * intel_powerclamp.c - package c-state idle injection * - * Copyright (c) 2012, Intel Corporation. + * Copyright (c) 2012-2023, Intel Corporation. * * Authors: * Arjan van de Ven <arjan@linux.intel.com> @@ -27,23 +27,17 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/delay.h> -#include <linux/kthread.h> #include <linux/cpu.h> #include <linux/thermal.h> -#include <linux/slab.h> -#include <linux/tick.h> #include <linux/debugfs.h> #include <linux/seq_file.h> -#include <linux/sched/rt.h> -#include <uapi/linux/sched/types.h> +#include <linux/idle_inject.h> -#include <asm/nmi.h> #include <asm/msr.h> #include <asm/mwait.h> #include <asm/cpu_device_id.h> -#include <asm/hardirq.h> -#define MAX_TARGET_RATIO (50U) +#define MAX_TARGET_RATIO (100U) /* For each undisturbed clamping period (no extra wake ups during idle time), * we increment the confidence counter for the given target ratio. * CONFIDENCE_OK defines the level where runtime calibration results are @@ -57,37 +51,30 @@ static unsigned int target_mwait; static struct dentry *debug_dir; +static bool poll_pkg_cstate_enable; -/* user selected target */ -static unsigned int set_target_ratio; +/* Idle ratio observed using package C-state counters */ static unsigned int current_ratio; -static bool should_skip; -static unsigned int control_cpu; /* The cpu assigned to collect stat and update - * control parameters. default to BSP but BSP - * can be offlined. - */ -static bool clamping; +/* Skip the idle injection till set to true */ +static bool should_skip; -struct powerclamp_worker_data { - struct kthread_worker *worker; - struct kthread_work balancing_work; - struct kthread_delayed_work idle_injection_work; +struct powerclamp_data { unsigned int cpu; unsigned int count; unsigned int guard; unsigned int window_size_now; unsigned int target_ratio; - unsigned int duration_jiffies; bool clamping; }; -static struct powerclamp_worker_data __percpu *worker_data; +static struct powerclamp_data powerclamp_data; + static struct thermal_cooling_device *cooling_dev; -static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu - * clamping kthread worker - */ +static DEFINE_MUTEX(powerclamp_lock); + +/* This duration is in microseconds */ static unsigned int duration; static unsigned int pkg_cstate_ratio_cur; static unsigned int window_size; @@ -104,25 +91,171 @@ static int duration_set(const char *arg, const struct kernel_param *kp) pr_err("Out of recommended range %lu, between 6-25ms\n", new_duration); ret = -EINVAL; + goto exit; } - duration = clamp(new_duration, 6ul, 25ul); - smp_mb(); - + mutex_lock(&powerclamp_lock); + duration = clamp(new_duration, 6ul, 25ul) * 1000; + mutex_unlock(&powerclamp_lock); exit: return ret; } +static int duration_get(char *buf, const struct kernel_param *kp) +{ + int ret; + + mutex_lock(&powerclamp_lock); + ret = sysfs_emit(buf, "%d\n", duration / 1000); + mutex_unlock(&powerclamp_lock); + + return ret; +} + static const struct kernel_param_ops duration_ops = { .set = duration_set, - .get = param_get_int, + .get = duration_get, }; - -module_param_cb(duration, &duration_ops, &duration, 0644); +module_param_cb(duration, &duration_ops, NULL, 0644); MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec."); +#define DEFAULT_MAX_IDLE 50 +#define MAX_ALL_CPU_IDLE 75 + +static u8 max_idle = DEFAULT_MAX_IDLE; + +static cpumask_var_t idle_injection_cpu_mask; + +static int allocate_copy_idle_injection_mask(const struct cpumask *copy_mask) +{ + if (cpumask_available(idle_injection_cpu_mask)) + goto copy_mask; + + /* This mask is allocated only one time and freed during module exit */ + if (!alloc_cpumask_var(&idle_injection_cpu_mask, GFP_KERNEL)) + return -ENOMEM; + +copy_mask: + cpumask_copy(idle_injection_cpu_mask, copy_mask); + + return 0; +} + +/* Return true if the cpumask and idle percent combination is invalid */ +static bool check_invalid(cpumask_var_t mask, u8 idle) +{ + if (cpumask_equal(cpu_present_mask, mask) && idle > MAX_ALL_CPU_IDLE) + return true; + + return false; +} + +static int cpumask_set(const char *arg, const struct kernel_param *kp) +{ + cpumask_var_t new_mask; + int ret; + + mutex_lock(&powerclamp_lock); + + /* Can't set mask when cooling device is in use */ + if (powerclamp_data.clamping) { + ret = -EAGAIN; + goto skip_cpumask_set; + } + + ret = alloc_cpumask_var(&new_mask, GFP_KERNEL); + if (!ret) + goto skip_cpumask_set; + + ret = bitmap_parse(arg, strlen(arg), cpumask_bits(new_mask), + nr_cpumask_bits); + if (ret) + goto free_cpumask_set; + + if (cpumask_empty(new_mask) || check_invalid(new_mask, max_idle)) { + ret = -EINVAL; + goto free_cpumask_set; + } + + /* + * When module parameters are passed from kernel command line + * during insmod, the module parameter callback is called + * before powerclamp_init(), so we can't assume that some + * cpumask can be allocated and copied before here. Also + * in this case this cpumask is used as the default mask. + */ + ret = allocate_copy_idle_injection_mask(new_mask); + +free_cpumask_set: + free_cpumask_var(new_mask); +skip_cpumask_set: + mutex_unlock(&powerclamp_lock); + + return ret; +} + +static int cpumask_get(char *buf, const struct kernel_param *kp) +{ + if (!cpumask_available(idle_injection_cpu_mask)) + return -ENODEV; + + return bitmap_print_to_pagebuf(false, buf, cpumask_bits(idle_injection_cpu_mask), + nr_cpumask_bits); +} + +static const struct kernel_param_ops cpumask_ops = { + .set = cpumask_set, + .get = cpumask_get, +}; + +module_param_cb(cpumask, &cpumask_ops, NULL, 0644); +MODULE_PARM_DESC(cpumask, "Mask of CPUs to use for idle injection."); + +static int max_idle_set(const char *arg, const struct kernel_param *kp) +{ + u8 new_max_idle; + int ret = 0; + + mutex_lock(&powerclamp_lock); + + /* Can't set mask when cooling device is in use */ + if (powerclamp_data.clamping) { + ret = -EAGAIN; + goto skip_limit_set; + } + + ret = kstrtou8(arg, 10, &new_max_idle); + if (ret) + goto skip_limit_set; + + if (new_max_idle > MAX_TARGET_RATIO) { + ret = -EINVAL; + goto skip_limit_set; + } + + if (check_invalid(idle_injection_cpu_mask, new_max_idle)) { + ret = -EINVAL; + goto skip_limit_set; + } + + max_idle = new_max_idle; + +skip_limit_set: + mutex_unlock(&powerclamp_lock); + + return ret; +} + +static const struct kernel_param_ops max_idle_ops = { + .set = max_idle_set, + .get = param_get_int, +}; + +module_param_cb(max_idle, &max_idle_ops, &max_idle, 0644); +MODULE_PARM_DESC(max_idle, "maximum injected idle time to the total CPU time ratio in percent range:1-100"); + struct powerclamp_calibration_data { unsigned long confidence; /* used for calibration, basically a counter * gets incremented each time a clamping @@ -261,6 +394,9 @@ static unsigned int get_compensation(int ratio) { unsigned int comp = 0; + if (!poll_pkg_cstate_enable) + return 0; + /* we only use compensation if all adjacent ones are good */ if (ratio == 1 && cal_data[ratio].confidence >= CONFIDENCE_OK && @@ -302,7 +438,7 @@ static void adjust_compensation(int target_ratio, unsigned int win) if (d->confidence >= CONFIDENCE_OK) return; - delta = set_target_ratio - current_ratio; + delta = powerclamp_data.target_ratio - current_ratio; /* filter out bad data */ if (delta >= 0 && delta <= (1+target_ratio/10)) { if (d->steady_comp) @@ -341,82 +477,39 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio, adjust_compensation(target_ratio, win); /* if we are above target+guard, skip */ - return set_target_ratio + guard <= current_ratio; + return powerclamp_data.target_ratio + guard <= current_ratio; } -static void clamp_balancing_func(struct kthread_work *work) +/* + * This function calculates runtime from the current target ratio. + * This function gets called under powerclamp_lock. + */ +static unsigned int get_run_time(void) { - struct powerclamp_worker_data *w_data; - int sleeptime; - unsigned long target_jiffies; unsigned int compensated_ratio; - int interval; /* jiffies to sleep for each attempt */ - - w_data = container_of(work, struct powerclamp_worker_data, - balancing_work); + unsigned int runtime; /* * make sure user selected ratio does not take effect until * the next round. adjust target_ratio if user has changed * target such that we can converge quickly. */ - w_data->target_ratio = READ_ONCE(set_target_ratio); - w_data->guard = 1 + w_data->target_ratio / 20; - w_data->window_size_now = window_size; - w_data->duration_jiffies = msecs_to_jiffies(duration); - w_data->count++; + powerclamp_data.guard = 1 + powerclamp_data.target_ratio / 20; + powerclamp_data.window_size_now = window_size; /* * systems may have different ability to enter package level * c-states, thus we need to compensate the injected idle ratio * to achieve the actual target reported by the HW. */ - compensated_ratio = w_data->target_ratio + - get_compensation(w_data->target_ratio); + compensated_ratio = powerclamp_data.target_ratio + + get_compensation(powerclamp_data.target_ratio); if (compensated_ratio <= 0) compensated_ratio = 1; - interval = w_data->duration_jiffies * 100 / compensated_ratio; - - /* align idle time */ - target_jiffies = roundup(jiffies, interval); - sleeptime = target_jiffies - jiffies; - if (sleeptime <= 0) - sleeptime = 1; - - if (clamping && w_data->clamping && cpu_online(w_data->cpu)) - kthread_queue_delayed_work(w_data->worker, - &w_data->idle_injection_work, - sleeptime); -} -static void clamp_idle_injection_func(struct kthread_work *work) -{ - struct powerclamp_worker_data *w_data; - - w_data = container_of(work, struct powerclamp_worker_data, - idle_injection_work.work); - - /* - * only elected controlling cpu can collect stats and update - * control parameters. - */ - if (w_data->cpu == control_cpu && - !(w_data->count % w_data->window_size_now)) { - should_skip = - powerclamp_adjust_controls(w_data->target_ratio, - w_data->guard, - w_data->window_size_now); - smp_mb(); - } + runtime = duration * 100 / compensated_ratio - duration; - if (should_skip) - goto balance; - - play_idle(jiffies_to_usecs(w_data->duration_jiffies)); - -balance: - if (clamping && w_data->clamping && cpu_online(w_data->cpu)) - kthread_queue_work(w_data->worker, &w_data->balancing_work); + return runtime; } /* @@ -452,126 +545,129 @@ static void poll_pkg_cstate(struct work_struct *dummy) msr_last = msr_now; tsc_last = tsc_now; - if (true == clamping) + mutex_lock(&powerclamp_lock); + if (powerclamp_data.clamping) schedule_delayed_work(&poll_pkg_cstate_work, HZ); + mutex_unlock(&powerclamp_lock); } -static void start_power_clamp_worker(unsigned long cpu) +static struct idle_inject_device *ii_dev; + +/* + * This function is called from idle injection core on timer expiry + * for the run duration. This allows powerclamp to readjust or skip + * injecting idle for this cycle. + */ +static bool idle_inject_update(void) { - struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu); - struct kthread_worker *worker; + bool update = false; - worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu); - if (IS_ERR(worker)) - return; + /* We can't sleep in this callback */ + if (!mutex_trylock(&powerclamp_lock)) + return true; - w_data->worker = worker; - w_data->count = 0; - w_data->cpu = cpu; - w_data->clamping = true; - set_bit(cpu, cpu_clamping_mask); - sched_set_fifo(worker->task); - kthread_init_work(&w_data->balancing_work, clamp_balancing_func); - kthread_init_delayed_work(&w_data->idle_injection_work, - clamp_idle_injection_func); - kthread_queue_work(w_data->worker, &w_data->balancing_work); -} + if (!(powerclamp_data.count % powerclamp_data.window_size_now)) { -static void stop_power_clamp_worker(unsigned long cpu) -{ - struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu); + should_skip = powerclamp_adjust_controls(powerclamp_data.target_ratio, + powerclamp_data.guard, + powerclamp_data.window_size_now); + update = true; + } - if (!w_data->worker) - return; + if (update) { + unsigned int runtime = get_run_time(); - w_data->clamping = false; - /* - * Make sure that all works that get queued after this point see - * the clamping disabled. The counter part is not needed because - * there is an implicit memory barrier when the queued work - * is proceed. - */ - smp_wmb(); - kthread_cancel_work_sync(&w_data->balancing_work); - kthread_cancel_delayed_work_sync(&w_data->idle_injection_work); - /* - * The balancing work still might be queued here because - * the handling of the "clapming" variable, cancel, and queue - * operations are not synchronized via a lock. But it is not - * a big deal. The balancing work is fast and destroy kthread - * will wait for it. - */ - clear_bit(w_data->cpu, cpu_clamping_mask); - kthread_destroy_worker(w_data->worker); + idle_inject_set_duration(ii_dev, runtime, duration); + } + + powerclamp_data.count++; + + mutex_unlock(&powerclamp_lock); + + if (should_skip) + return false; - w_data->worker = NULL; + return true; } -static int start_power_clamp(void) +/* This function starts idle injection by calling idle_inject_start() */ +static void trigger_idle_injection(void) { - unsigned long cpu; - - set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); - /* prevent cpu hotplug */ - cpus_read_lock(); + unsigned int runtime = get_run_time(); - /* prefer BSP */ - control_cpu = cpumask_first(cpu_online_mask); + idle_inject_set_duration(ii_dev, runtime, duration); + idle_inject_start(ii_dev); + powerclamp_data.clamping = true; +} - clamping = true; - schedule_delayed_work(&poll_pkg_cstate_work, 0); +/* + * This function is called from start_power_clamp() to register + * CPUS with powercap idle injection register and set default + * idle duration and latency. + */ +static int powerclamp_idle_injection_register(void) +{ + poll_pkg_cstate_enable = false; + if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) { + ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update); + if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) + poll_pkg_cstate_enable = true; + } else { + ii_dev = idle_inject_register(idle_injection_cpu_mask); + } - /* start one kthread worker per online cpu */ - for_each_online_cpu(cpu) { - start_power_clamp_worker(cpu); + if (!ii_dev) { + pr_err("powerclamp: idle_inject_register failed\n"); + return -EAGAIN; } - cpus_read_unlock(); + + idle_inject_set_duration(ii_dev, TICK_USEC, duration); + idle_inject_set_latency(ii_dev, UINT_MAX); return 0; } -static void end_power_clamp(void) +/* + * This function is called from end_power_clamp() to stop idle injection + * and unregister CPUS from powercap idle injection core. + */ +static void remove_idle_injection(void) { - int i; + if (!powerclamp_data.clamping) + return; - /* - * Block requeuing in all the kthread workers. They will flush and - * stop faster. - */ - clamping = false; - for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { - pr_debug("clamping worker for cpu %d alive, destroy\n", i); - stop_power_clamp_worker(i); - } + powerclamp_data.clamping = false; + idle_inject_stop(ii_dev); } -static int powerclamp_cpu_online(unsigned int cpu) +/* + * This function is called when user change the cooling device + * state from zero to some other value. + */ +static int start_power_clamp(void) { - if (clamping == false) - return 0; - start_power_clamp_worker(cpu); - /* prefer BSP as controlling CPU */ - if (cpu == 0) { - control_cpu = 0; - smp_mb(); + int ret; + + ret = powerclamp_idle_injection_register(); + if (!ret) { + trigger_idle_injection(); + if (poll_pkg_cstate_enable) + schedule_delayed_work(&poll_pkg_cstate_work, 0); } - return 0; + + return ret; } -static int powerclamp_cpu_predown(unsigned int cpu) +/* + * This function is called when user change the cooling device + * state from non zero value zero. + */ +static void end_power_clamp(void) { - if (clamping == false) - return 0; - - stop_power_clamp_worker(cpu); - if (cpu != control_cpu) - return 0; - - control_cpu = cpumask_first(cpu_online_mask); - if (control_cpu == cpu) - control_cpu = cpumask_next(cpu, cpu_online_mask); - smp_mb(); - return 0; + if (powerclamp_data.clamping) { + remove_idle_injection(); + idle_inject_unregister(ii_dev); + } } static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, @@ -585,11 +681,9 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) { - if (true == clamping) - *state = pkg_cstate_ratio_cur; - else - /* to save power, do not poll idle ratio while not clamping */ - *state = -1; /* indicates invalid state */ + mutex_lock(&powerclamp_lock); + *state = powerclamp_data.target_ratio; + mutex_unlock(&powerclamp_lock); return 0; } @@ -599,24 +693,32 @@ static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, { int ret = 0; + mutex_lock(&powerclamp_lock); + new_target_ratio = clamp(new_target_ratio, 0UL, - (unsigned long) (MAX_TARGET_RATIO-1)); - if (set_target_ratio == 0 && new_target_ratio > 0) { + (unsigned long) (max_idle - 1)); + if (!powerclamp_data.target_ratio && new_target_ratio > 0) { pr_info("Start idle injection to reduce power\n"); - set_target_ratio = new_target_ratio; + powerclamp_data.target_ratio = new_target_ratio; ret = start_power_clamp(); + if (ret) + powerclamp_data.target_ratio = 0; goto exit_set; - } else if (set_target_ratio > 0 && new_target_ratio == 0) { + } else if (powerclamp_data.target_ratio > 0 && new_target_ratio == 0) { pr_info("Stop forced idle injection\n"); end_power_clamp(); - set_target_ratio = 0; + powerclamp_data.target_ratio = 0; } else /* adjust currently running */ { - set_target_ratio = new_target_ratio; - /* make new set_target_ratio visible to other cpus */ - smp_mb(); + unsigned int runtime; + + powerclamp_data.target_ratio = new_target_ratio; + runtime = get_run_time(); + idle_inject_set_duration(ii_dev, runtime, duration); } exit_set: + mutex_unlock(&powerclamp_lock); + return ret; } @@ -657,7 +759,6 @@ static int powerclamp_debug_show(struct seq_file *m, void *unused) { int i = 0; - seq_printf(m, "controlling cpu: %d\n", control_cpu); seq_printf(m, "pct confidence steady dynamic (compensation)\n"); for (i = 0; i < MAX_TARGET_RATIO; i++) { seq_printf(m, "%d\t%lu\t%lu\t%lu\n", @@ -680,75 +781,57 @@ static inline void powerclamp_create_debug_files(void) &powerclamp_debug_fops); } -static enum cpuhp_state hp_state; - static int __init powerclamp_init(void) { int retval; - cpu_clamping_mask = bitmap_zalloc(num_possible_cpus(), GFP_KERNEL); - if (!cpu_clamping_mask) - return -ENOMEM; - /* probe cpu features and ids here */ retval = powerclamp_probe(); if (retval) - goto exit_free; + return retval; + + mutex_lock(&powerclamp_lock); + retval = allocate_copy_idle_injection_mask(cpu_present_mask); + mutex_unlock(&powerclamp_lock); + + if (retval) + return retval; /* set default limit, maybe adjusted during runtime based on feedback */ window_size = 2; - retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "thermal/intel_powerclamp:online", - powerclamp_cpu_online, - powerclamp_cpu_predown); - if (retval < 0) - goto exit_free; - - hp_state = retval; - - worker_data = alloc_percpu(struct powerclamp_worker_data); - if (!worker_data) { - retval = -ENOMEM; - goto exit_unregister; - } cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, - &powerclamp_cooling_ops); - if (IS_ERR(cooling_dev)) { - retval = -ENODEV; - goto exit_free_thread; - } + &powerclamp_cooling_ops); + if (IS_ERR(cooling_dev)) + return -ENODEV; if (!duration) - duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES); + duration = jiffies_to_usecs(DEFAULT_DURATION_JIFFIES); powerclamp_create_debug_files(); return 0; - -exit_free_thread: - free_percpu(worker_data); -exit_unregister: - cpuhp_remove_state_nocalls(hp_state); -exit_free: - bitmap_free(cpu_clamping_mask); - return retval; } module_init(powerclamp_init); static void __exit powerclamp_exit(void) { + mutex_lock(&powerclamp_lock); end_power_clamp(); - cpuhp_remove_state_nocalls(hp_state); - free_percpu(worker_data); + mutex_unlock(&powerclamp_lock); + thermal_cooling_device_unregister(cooling_dev); - bitmap_free(cpu_clamping_mask); cancel_delayed_work_sync(&poll_pkg_cstate_work); debugfs_remove_recursive(debug_dir); + + if (cpumask_available(idle_injection_cpu_mask)) + free_cpumask_var(idle_injection_cpu_mask); } module_exit(powerclamp_exit); +MODULE_IMPORT_NS(IDLE_INJECT); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>"); diff --git a/drivers/thermal/intel/intel_quark_dts_thermal.c b/drivers/thermal/intel/intel_quark_dts_thermal.c index 3eafc6b0e6c3..97b843fa7568 100644 --- a/drivers/thermal/intel/intel_quark_dts_thermal.c +++ b/drivers/thermal/intel/intel_quark_dts_thermal.c @@ -84,6 +84,7 @@ #define QRK_DTS_MASK_TP_THRES 0xFF #define QRK_DTS_SHIFT_TP 8 #define QRK_DTS_ID_TP_CRITICAL 0 +#define QRK_DTS_ID_TP_HOT 1 #define QRK_DTS_SAFE_TP_THRES 105 /* Thermal Sensor Register Lock */ @@ -104,6 +105,7 @@ struct soc_sensor_entry { u32 store_ptps; u32 store_dts_enable; struct thermal_zone_device *tzone; + struct thermal_trip trips[QRK_MAX_DTS_TRIPS]; }; static struct soc_sensor_entry *soc_dts; @@ -172,9 +174,9 @@ static int soc_dts_disable(struct thermal_zone_device *tzd) return ret; } -static int _get_trip_temp(int trip, int *temp) +static int get_trip_temp(int trip) { - int status; + int status, temp; u32 out; mutex_lock(&dts_update_mutex); @@ -183,7 +185,7 @@ static int _get_trip_temp(int trip, int *temp) mutex_unlock(&dts_update_mutex); if (status) - return status; + return THERMAL_TEMP_INVALID; /* * Thermal Sensor Programmable Trip Point Register has 8-bit @@ -191,21 +193,10 @@ static int _get_trip_temp(int trip, int *temp) * thresholds. The threshold value is always offset by its * temperature base (50 degree Celsius). */ - *temp = (out >> (trip * QRK_DTS_SHIFT_TP)) & QRK_DTS_MASK_TP_THRES; - *temp -= QRK_DTS_TEMP_BASE; + temp = (out >> (trip * QRK_DTS_SHIFT_TP)) & QRK_DTS_MASK_TP_THRES; + temp -= QRK_DTS_TEMP_BASE; - return 0; -} - -static inline int sys_get_trip_temp(struct thermal_zone_device *tzd, - int trip, int *temp) -{ - return _get_trip_temp(trip, temp); -} - -static inline int sys_get_crit_temp(struct thermal_zone_device *tzd, int *temp) -{ - return _get_trip_temp(QRK_DTS_ID_TP_CRITICAL, temp); + return temp; } static int update_trip_temp(struct soc_sensor_entry *aux_entry, @@ -262,17 +253,6 @@ static inline int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, return update_trip_temp(tzd->devdata, trip, temp); } -static int sys_get_trip_type(struct thermal_zone_device *thermal, - int trip, enum thermal_trip_type *type) -{ - if (trip) - *type = THERMAL_TRIP_HOT; - else - *type = THERMAL_TRIP_CRITICAL; - - return 0; -} - static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp) { @@ -315,10 +295,7 @@ static int sys_change_mode(struct thermal_zone_device *tzd, static struct thermal_zone_device_ops tzone_ops = { .get_temp = sys_get_curr_temp, - .get_trip_temp = sys_get_trip_temp, - .get_trip_type = sys_get_trip_type, .set_trip_temp = sys_set_trip_temp, - .get_crit_temp = sys_get_crit_temp, .change_mode = sys_change_mode, }; @@ -385,10 +362,18 @@ static struct soc_sensor_entry *alloc_soc_dts(void) goto err_ret; } - aux_entry->tzone = thermal_zone_device_register("quark_dts", - QRK_MAX_DTS_TRIPS, - wr_mask, - aux_entry, &tzone_ops, NULL, 0, polling_delay); + aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = get_trip_temp(QRK_DTS_ID_TP_CRITICAL); + aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL; + + aux_entry->trips[QRK_DTS_ID_TP_HOT].temperature = get_trip_temp(QRK_DTS_ID_TP_HOT); + aux_entry->trips[QRK_DTS_ID_TP_HOT].type = THERMAL_TRIP_HOT; + + aux_entry->tzone = thermal_zone_device_register_with_trips("quark_dts", + aux_entry->trips, + QRK_MAX_DTS_TRIPS, + wr_mask, + aux_entry, &tzone_ops, + NULL, 0, polling_delay); if (IS_ERR(aux_entry->tzone)) { err = PTR_ERR(aux_entry->tzone); goto err_ret; diff --git a/drivers/thermal/thermal_acpi.c b/drivers/thermal/thermal_acpi.c index 671f774a7621..0e5698818f69 100644 --- a/drivers/thermal/thermal_acpi.c +++ b/drivers/thermal/thermal_acpi.c @@ -21,42 +21,11 @@ #define TEMP_MIN_DECIK 2180 #define TEMP_MAX_DECIK 4480 -static int thermal_acpi_trip_init(struct acpi_device *adev, - enum thermal_trip_type type, int id, - struct thermal_trip *trip) +static int thermal_acpi_trip_temp(struct acpi_device *adev, char *obj_name, + int *ret_temp) { unsigned long long temp; acpi_status status; - char obj_name[5]; - - switch (type) { - case THERMAL_TRIP_ACTIVE: - if (id < 0 || id > 9) - return -EINVAL; - - obj_name[1] = 'A'; - obj_name[2] = 'C'; - obj_name[3] = '0' + id; - break; - case THERMAL_TRIP_PASSIVE: - obj_name[1] = 'P'; - obj_name[2] = 'S'; - obj_name[3] = 'V'; - break; - case THERMAL_TRIP_HOT: - obj_name[1] = 'H'; - obj_name[2] = 'O'; - obj_name[3] = 'T'; - break; - case THERMAL_TRIP_CRITICAL: - obj_name[1] = 'C'; - obj_name[2] = 'R'; - obj_name[3] = 'T'; - break; - } - - obj_name[0] = '_'; - obj_name[4] = '\0'; status = acpi_evaluate_integer(adev->handle, obj_name, NULL, &temp); if (ACPI_FAILURE(status)) { @@ -65,87 +34,84 @@ static int thermal_acpi_trip_init(struct acpi_device *adev, } if (temp >= TEMP_MIN_DECIK && temp <= TEMP_MAX_DECIK) { - trip->temperature = deci_kelvin_to_millicelsius(temp); + *ret_temp = deci_kelvin_to_millicelsius(temp); } else { acpi_handle_debug(adev->handle, "%s result %llu out of range\n", obj_name, temp); - trip->temperature = THERMAL_TEMP_INVALID; + *ret_temp = THERMAL_TEMP_INVALID; } - trip->hysteresis = 0; - trip->type = type; - return 0; } /** - * thermal_acpi_trip_active - Get the specified active trip point - * @adev: Thermal zone ACPI device object to get the description from. + * thermal_acpi_active_trip_temp - Retrieve active trip point temperature + * @adev: Target thermal zone ACPI device object. * @id: Active cooling level (0 - 9). - * @trip: Trip point structure to be populated on success. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _ACx object for the thermal zone represented by @adev to obtain * the temperature of the active cooling trip point corresponding to the active - * cooling level given by @id and initialize @trip as an active trip point using - * that temperature value. + * cooling level given by @id. * * Return 0 on success or a negative error value on failure. */ -int thermal_acpi_trip_active(struct acpi_device *adev, int id, - struct thermal_trip *trip) +int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_ACTIVE, id, trip); + char obj_name[] = {'_', 'A', 'C', '0' + id, '\0'}; + + if (id < 0 || id > 9) + return -EINVAL; + + return thermal_acpi_trip_temp(adev, obj_name, ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_active); +EXPORT_SYMBOL_GPL(thermal_acpi_active_trip_temp); /** - * thermal_acpi_trip_passive - Get the passive trip point - * @adev: Thermal zone ACPI device object to get the description from. - * @trip: Trip point structure to be populated on success. + * thermal_acpi_passive_trip_temp - Retrieve passive trip point temperature + * @adev: Target thermal zone ACPI device object. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _PSV object for the thermal zone represented by @adev to obtain - * the temperature of the passive cooling trip point and initialize @trip as a - * passive trip point using that temperature value. + * the temperature of the passive cooling trip point. * * Return 0 on success or -ENODATA on failure. */ -int thermal_acpi_trip_passive(struct acpi_device *adev, struct thermal_trip *trip) +int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_PASSIVE, INT_MAX, trip); + return thermal_acpi_trip_temp(adev, "_PSV", ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_passive); +EXPORT_SYMBOL_GPL(thermal_acpi_passive_trip_temp); /** - * thermal_acpi_trip_hot - Get the near critical trip point - * @adev: the ACPI device to get the description from. - * @trip: a &struct thermal_trip to be filled if the function succeed. + * thermal_acpi_hot_trip_temp - Retrieve hot trip point temperature + * @adev: Target thermal zone ACPI device object. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _HOT object for the thermal zone represented by @adev to obtain * the temperature of the trip point at which the system is expected to be put - * into the S4 sleep state and initialize @trip as a hot trip point using that - * temperature value. + * into the S4 sleep state. * * Return 0 on success or -ENODATA on failure. */ -int thermal_acpi_trip_hot(struct acpi_device *adev, struct thermal_trip *trip) +int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_HOT, INT_MAX, trip); + return thermal_acpi_trip_temp(adev, "_HOT", ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_hot); +EXPORT_SYMBOL_GPL(thermal_acpi_hot_trip_temp); /** - * thermal_acpi_trip_critical - Get the critical trip point - * @adev: the ACPI device to get the description from. - * @trip: a &struct thermal_trip to be filled if the function succeed. + * thermal_acpi_critical_trip_temp - Retrieve critical trip point temperature + * @adev: Target thermal zone ACPI device object. + * @ret_temp: Address to store the retrieved temperature value on success. * * Evaluate the _CRT object for the thermal zone represented by @adev to obtain - * the temperature of the critical cooling trip point and initialize @trip as a - * critical trip point using that temperature value. + * the temperature of the critical cooling trip point. * * Return 0 on success or -ENODATA on failure. */ -int thermal_acpi_trip_critical(struct acpi_device *adev, struct thermal_trip *trip) +int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp) { - return thermal_acpi_trip_init(adev, THERMAL_TRIP_CRITICAL, INT_MAX, trip); + return thermal_acpi_trip_temp(adev, "_CRT", ret_temp); } -EXPORT_SYMBOL_GPL(thermal_acpi_trip_critical); +EXPORT_SYMBOL_GPL(thermal_acpi_critical_trip_temp); |