diff options
author | Penchala Narasimha Reddy Chilakala, ERS-HCLTech <narasimhareddyc@hcl.in> | 2009-12-21 18:39:27 +0530 |
---|---|---|
committer | James Bottomley <James.Bottomley@suse.de> | 2010-01-17 12:16:17 -0600 |
commit | cacb6dc3d7fea751879a225c15e48228415e6359 (patch) | |
tree | f0d1b3792febab8910274e15f0076053a825e392 /drivers/scsi/aacraid/aachba.c | |
parent | e6622df3bb1a8e1135f4b84928e24d4c6802f6b5 (diff) | |
download | lwn-cacb6dc3d7fea751879a225c15e48228415e6359.tar.gz lwn-cacb6dc3d7fea751879a225c15e48228415e6359.zip |
[SCSI] aacraid: fix File System going into read-only mode
These particular problems were reported by Cisco and SAP and customers
as well. Cisco reported on RHEL4 U6 and SAP reported on SLES9 SP4 and
SLES10 SP2. We added these fixes on RHEL4 U6 and gave a private build
to IBM and Cisco. Cisco and IBM tested it for more than 15 days and
they reported that they did not see the issue so far. Before the fix,
Cisco used to see the issue within 5 days. We generated a patch for
SLES9 SP4 and SLES10 SP2 and submitted to Novell. Novell applied the
patch and gave a test build to SAP. SAP tested and reported that the
build is working properly.
We also tested in our lab using the tools "dishogsync", which is IO
stress tool and the tool was provided by Cisco.
Issue1: File System going into read-only mode
Root cause: The driver tends to not free the memory (FIB) when the
management request exits prematurely. The accumulation of such
un-freed memory causes the driver to fail to allocate anymore memory
(FIB) and hence return 0x70000 value to the upper layer, which puts
the file system into read only mode.
Fix details: The fix makes sure to free the memory (FIB) even if the
request exits prematurely hence ensuring the driver wouldn't run out
of memory (FIBs).
Issue2: False Raid Alert occurs
When the Physical Drives and Logical drives are reported as deleted or
added, even though there is no change done on the system
Root cause: Driver IOCTLs is signaled with EINTR while waiting on
response from the lower layers. Returning "EINTR" will never initiate
internal retry.
Fix details: The issue was fixed by replacing "EINTR" with
"ERESTARTSYS" for mid-layer retries.
Signed-off-by: Penchala Narasimha Reddy <ServeRAIDDriver@hcl.in>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi/aacraid/aachba.c')
-rw-r--r-- | drivers/scsi/aacraid/aachba.c | 52 |
1 files changed, 40 insertions, 12 deletions
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 2a889853a106..7e26ebc26661 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -293,7 +293,10 @@ int aac_get_config_status(struct aac_dev *dev, int commit_flag) status = -EINVAL; } } - aac_fib_complete(fibptr); + /* Do not set XferState to zero unless receives a response from F/W */ + if (status >= 0) + aac_fib_complete(fibptr); + /* Send a CT_COMMIT_CONFIG to enable discovery of devices */ if (status >= 0) { if ((aac_commit == 1) || commit_flag) { @@ -310,13 +313,18 @@ int aac_get_config_status(struct aac_dev *dev, int commit_flag) FsaNormal, 1, 1, NULL, NULL); - aac_fib_complete(fibptr); + /* Do not set XferState to zero unless + * receives a response from F/W */ + if (status >= 0) + aac_fib_complete(fibptr); } else if (aac_commit == 0) { printk(KERN_WARNING "aac_get_config_status: Foreign device configurations are being ignored\n"); } } - aac_fib_free(fibptr); + /* FIB should be freed only after getting the response from the F/W */ + if (status != -ERESTARTSYS) + aac_fib_free(fibptr); return status; } @@ -355,7 +363,9 @@ int aac_get_containers(struct aac_dev *dev) maximum_num_containers = le32_to_cpu(dresp->ContainerSwitchEntries); aac_fib_complete(fibptr); } - aac_fib_free(fibptr); + /* FIB should be freed only after getting the response from the F/W */ + if (status != -ERESTARTSYS) + aac_fib_free(fibptr); if (maximum_num_containers < MAXIMUM_NUM_CONTAINERS) maximum_num_containers = MAXIMUM_NUM_CONTAINERS; @@ -1245,8 +1255,12 @@ int aac_get_adapter_info(struct aac_dev* dev) NULL); if (rcode < 0) { - aac_fib_complete(fibptr); - aac_fib_free(fibptr); + /* FIB should be freed only after + * getting the response from the F/W */ + if (rcode != -ERESTARTSYS) { + aac_fib_complete(fibptr); + aac_fib_free(fibptr); + } return rcode; } memcpy(&dev->adapter_info, info, sizeof(*info)); @@ -1270,6 +1284,12 @@ int aac_get_adapter_info(struct aac_dev* dev) if (rcode >= 0) memcpy(&dev->supplement_adapter_info, sinfo, sizeof(*sinfo)); + if (rcode == -ERESTARTSYS) { + fibptr = aac_fib_alloc(dev); + if (!fibptr) + return -ENOMEM; + } + } @@ -1470,9 +1490,11 @@ int aac_get_adapter_info(struct aac_dev* dev) (dev->scsi_host_ptr->sg_tablesize * 8) + 112; } } - - aac_fib_complete(fibptr); - aac_fib_free(fibptr); + /* FIB should be freed only after getting the response from the F/W */ + if (rcode != -ERESTARTSYS) { + aac_fib_complete(fibptr); + aac_fib_free(fibptr); + } return rcode; } @@ -1633,6 +1655,7 @@ static int aac_read(struct scsi_cmnd * scsicmd) * Alocate and initialize a Fib */ if (!(cmd_fibcontext = aac_fib_alloc(dev))) { + printk(KERN_WARNING "aac_read: fib allocation failed\n"); return -1; } @@ -1712,9 +1735,14 @@ static int aac_write(struct scsi_cmnd * scsicmd) * Allocate and initialize a Fib then setup a BlockWrite command */ if (!(cmd_fibcontext = aac_fib_alloc(dev))) { - scsicmd->result = DID_ERROR << 16; - scsicmd->scsi_done(scsicmd); - return 0; + /* FIB temporarily unavailable,not catastrophic failure */ + + /* scsicmd->result = DID_ERROR << 16; + * scsicmd->scsi_done(scsicmd); + * return 0; + */ + printk(KERN_WARNING "aac_write: fib allocation failed\n"); + return -1; } status = aac_adapter_write(cmd_fibcontext, scsicmd, lba, count, fua); |