summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Hutchings <bhutchings@solarflare.com>2010-12-02 13:46:24 +0000
committerDavid S. Miller <davem@davemloft.net>2010-12-03 09:08:03 -0800
commit71839f7d162f973f5931d30d1376a2dc5c0bed5a (patch)
tree0187d9996d6a2d4945acf1eaafa0f062935e4621
parent4484cd7dedecf59aee0775c6658f95bdee65f277 (diff)
downloadlwn-71839f7d162f973f5931d30d1376a2dc5c0bed5a.tar.gz
lwn-71839f7d162f973f5931d30d1376a2dc5c0bed5a.zip
sfc: Distinguish critical and non-critical over-temperature conditions
Set both the 'maximum' and critical temperature limits for LM87 hardware monitors on Falcon boards. Do not shut down a port until the critical temperature is reached, but warn as soon as the 'maximum' temperature is reached. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/sfc/falcon_boards.c109
1 files changed, 80 insertions, 29 deletions
diff --git a/drivers/net/sfc/falcon_boards.c b/drivers/net/sfc/falcon_boards.c
index cfc29d767588..86180ee02ec0 100644
--- a/drivers/net/sfc/falcon_boards.c
+++ b/drivers/net/sfc/falcon_boards.c
@@ -30,17 +30,28 @@
#define FALCON_BOARD_SFN4112F 0x52
/* Board temperature is about 15°C above ambient when air flow is
- * limited. */
+ * limited. The maximum acceptable ambient temperature varies
+ * depending on the PHY specifications but the critical temperature
+ * above which we should shut down to avoid damage is 80°C. */
#define FALCON_BOARD_TEMP_BIAS 15
+#define FALCON_BOARD_TEMP_CRIT (80 + FALCON_BOARD_TEMP_BIAS)
/* SFC4000 datasheet says: 'The maximum permitted junction temperature
* is 125°C; the thermal design of the environment for the SFC4000
* should aim to keep this well below 100°C.' */
+#define FALCON_JUNC_TEMP_MIN 0
#define FALCON_JUNC_TEMP_MAX 90
+#define FALCON_JUNC_TEMP_CRIT 125
/*****************************************************************************
* Support for LM87 sensor chip used on several boards
*/
+#define LM87_REG_TEMP_HW_INT_LOCK 0x13
+#define LM87_REG_TEMP_HW_EXT_LOCK 0x14
+#define LM87_REG_TEMP_HW_INT 0x17
+#define LM87_REG_TEMP_HW_EXT 0x18
+#define LM87_REG_TEMP_EXT1 0x26
+#define LM87_REG_TEMP_INT 0x27
#define LM87_REG_ALARMS1 0x41
#define LM87_REG_ALARMS2 0x42
#define LM87_IN_LIMITS(nr, _min, _max) \
@@ -57,6 +68,27 @@
#if defined(CONFIG_SENSORS_LM87) || defined(CONFIG_SENSORS_LM87_MODULE)
+static int efx_poke_lm87(struct i2c_client *client, const u8 *reg_values)
+{
+ while (*reg_values) {
+ u8 reg = *reg_values++;
+ u8 value = *reg_values++;
+ int rc = i2c_smbus_write_byte_data(client, reg, value);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+static const u8 falcon_lm87_common_regs[] = {
+ LM87_REG_TEMP_HW_INT_LOCK, FALCON_BOARD_TEMP_CRIT,
+ LM87_REG_TEMP_HW_INT, FALCON_BOARD_TEMP_CRIT,
+ LM87_TEMP_EXT1_LIMITS(FALCON_JUNC_TEMP_MIN, FALCON_JUNC_TEMP_MAX),
+ LM87_REG_TEMP_HW_EXT_LOCK, FALCON_JUNC_TEMP_CRIT,
+ LM87_REG_TEMP_HW_EXT, FALCON_JUNC_TEMP_CRIT,
+ 0
+};
+
static int efx_init_lm87(struct efx_nic *efx, struct i2c_board_info *info,
const u8 *reg_values)
{
@@ -67,13 +99,12 @@ static int efx_init_lm87(struct efx_nic *efx, struct i2c_board_info *info,
if (!client)
return -EIO;
- while (*reg_values) {
- u8 reg = *reg_values++;
- u8 value = *reg_values++;
- rc = i2c_smbus_write_byte_data(client, reg, value);
- if (rc)
- goto err;
- }
+ rc = efx_poke_lm87(client, reg_values);
+ if (rc)
+ goto err;
+ rc = efx_poke_lm87(client, falcon_lm87_common_regs);
+ if (rc)
+ goto err;
board->hwmon_client = client;
return 0;
@@ -91,36 +122,56 @@ static void efx_fini_lm87(struct efx_nic *efx)
static int efx_check_lm87(struct efx_nic *efx, unsigned mask)
{
struct i2c_client *client = falcon_board(efx)->hwmon_client;
- s32 alarms1, alarms2;
+ bool temp_crit, elec_fault, is_failure;
+ u16 alarms;
+ s32 reg;
/* If link is up then do not monitor temperature */
if (EFX_WORKAROUND_7884(efx) && efx->link_state.up)
return 0;
- alarms1 = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
- alarms2 = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
- if (alarms1 < 0)
- return alarms1;
- if (alarms2 < 0)
- return alarms2;
- alarms1 &= mask;
- alarms2 &= mask >> 8;
- if (alarms1 || alarms2) {
+ reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
+ if (reg < 0)
+ return reg;
+ alarms = reg;
+ reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
+ if (reg < 0)
+ return reg;
+ alarms |= reg << 8;
+ alarms &= mask;
+
+ temp_crit = false;
+ if (alarms & LM87_ALARM_TEMP_INT) {
+ reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_INT);
+ if (reg < 0)
+ return reg;
+ if (reg > FALCON_BOARD_TEMP_CRIT)
+ temp_crit = true;
+ }
+ if (alarms & LM87_ALARM_TEMP_EXT1) {
+ reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_EXT1);
+ if (reg < 0)
+ return reg;
+ if (reg > FALCON_JUNC_TEMP_CRIT)
+ temp_crit = true;
+ }
+ elec_fault = alarms & ~(LM87_ALARM_TEMP_INT | LM87_ALARM_TEMP_EXT1);
+ is_failure = temp_crit || elec_fault;
+
+ if (alarms)
netif_err(efx, hw, efx->net_dev,
- "LM87 detected a hardware failure (status %02x:%02x)"
- "%s%s%s\n",
- alarms1, alarms2,
- (alarms1 & LM87_ALARM_TEMP_INT) ?
+ "LM87 detected a hardware %s (status %02x:%02x)"
+ "%s%s%s%s\n",
+ is_failure ? "failure" : "problem",
+ alarms & 0xff, alarms >> 8,
+ (alarms & LM87_ALARM_TEMP_INT) ?
"; board is overheating" : "",
- (alarms1 & LM87_ALARM_TEMP_EXT1) ?
+ (alarms & LM87_ALARM_TEMP_EXT1) ?
"; controller is overheating" : "",
- (alarms1 & ~(LM87_ALARM_TEMP_INT | LM87_ALARM_TEMP_EXT1)
- || alarms2) ?
- "; electrical fault" : "");
- return -ERANGE;
- }
+ temp_crit ? "; reached critical temperature" : "",
+ elec_fault ? "; electrical fault" : "");
- return 0;
+ return is_failure ? -ERANGE : 0;
}
#else /* !CONFIG_SENSORS_LM87 */