diff options
author | Bryan O'Sullivan <bos@pathscale.com> | 2006-09-28 09:00:18 -0700 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2006-09-28 11:17:03 -0700 |
commit | 89d1e09b6a6d844ef327937f41658a426be42501 (patch) | |
tree | 5730241c737baf67b0b1ddf89ff38f6936d649c4 /drivers/infiniband/hw/ipath/ipath_iba6110.c | |
parent | 510847750c9d26052a71631e0fcad9e7f7a5f369 (diff) | |
download | lwn-89d1e09b6a6d844ef327937f41658a426be42501.tar.gz lwn-89d1e09b6a6d844ef327937f41658a426be42501.zip |
IB/ipath: Fix and recover TXE piobuf and PBC parity errors
We can sometimes trigger parity errors due to processor speculative
reads to our write-combined memory (mostly seen on Woodcrest). Add a
stats counter for these.
Factored out the sendbuffererror buffer cancellation code so it can be
used in the new handling; suppress likely subsequent error messages if
within two jiffies of the cancellation.
Also restore 2 dropped TXE lines on hwe_bitsextant noticed while
debugging.
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_iba6110.c')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_iba6110.c | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index fd49c9c32c68..9e4e8d4c6e20 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -451,7 +451,10 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, * make sure we get this much out, unless told to be quiet, * or it's occurred within the last 5 seconds */ - if ((hwerrs & ~dd->ipath_lasthwerror) || + if ((hwerrs & ~(dd->ipath_lasthwerror | + ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) || (ipath_debug & __IPATH_VERBDBG)) dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " "(cleared)\n", (unsigned long long) hwerrs); @@ -464,6 +467,33 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); if (ctrl & INFINIPATH_C_FREEZEMODE) { + /* + * parity errors in send memory are recoverable, + * just cancel the send (if indicated in * sendbuffererror), + * count the occurrence, unfreeze (if no other handled + * hardware error bits are set), and continue. They can + * occur if a processor speculative read is done to the PIO + * buffer while we are sending a packet, for example. + */ + if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { + ipath_stats.sps_txeparity++; + ipath_dbg("Recovering from TXE parity error (%llu), " + "hwerrstatus=%llx\n", + (unsigned long long) ipath_stats.sps_txeparity, + (unsigned long long) hwerrs); + ipath_disarm_senderrbufs(dd); + hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); + if (!hwerrs) { /* else leave in freeze mode */ + ipath_write_kreg(dd, + dd->ipath_kregs->kr_control, + dd->ipath_control); + return; + } + } if (hwerrs) { /* * if any set that we aren't ignoring; only |