summaryrefslogtreecommitdiffstats
path: root/drivers/scsi/aacraid/commsup.c
diff options
context:
space:
mode:
authorPenchala Narasimha Reddy Chilakala, ERS-HCLTech <narasimhareddyc@hcl.in>2009-12-21 18:39:27 +0530
committerJames Bottomley <James.Bottomley@suse.de>2010-01-17 12:16:17 -0600
commitcacb6dc3d7fea751879a225c15e48228415e6359 (patch)
treef0d1b3792febab8910274e15f0076053a825e392 /drivers/scsi/aacraid/commsup.c
parente6622df3bb1a8e1135f4b84928e24d4c6802f6b5 (diff)
downloadop-kernel-dev-cacb6dc3d7fea751879a225c15e48228415e6359.zip
op-kernel-dev-cacb6dc3d7fea751879a225c15e48228415e6359.tar.gz
[SCSI] aacraid: fix File System going into read-only mode
These particular problems were reported by Cisco and SAP and customers as well. Cisco reported on RHEL4 U6 and SAP reported on SLES9 SP4 and SLES10 SP2. We added these fixes on RHEL4 U6 and gave a private build to IBM and Cisco. Cisco and IBM tested it for more than 15 days and they reported that they did not see the issue so far. Before the fix, Cisco used to see the issue within 5 days. We generated a patch for SLES9 SP4 and SLES10 SP2 and submitted to Novell. Novell applied the patch and gave a test build to SAP. SAP tested and reported that the build is working properly. We also tested in our lab using the tools "dishogsync", which is IO stress tool and the tool was provided by Cisco. Issue1: File System going into read-only mode Root cause: The driver tends to not free the memory (FIB) when the management request exits prematurely. The accumulation of such un-freed memory causes the driver to fail to allocate anymore memory (FIB) and hence return 0x70000 value to the upper layer, which puts the file system into read only mode. Fix details: The fix makes sure to free the memory (FIB) even if the request exits prematurely hence ensuring the driver wouldn't run out of memory (FIBs). Issue2: False Raid Alert occurs When the Physical Drives and Logical drives are reported as deleted or added, even though there is no change done on the system Root cause: Driver IOCTLs is signaled with EINTR while waiting on response from the lower layers. Returning "EINTR" will never initiate internal retry. Fix details: The issue was fixed by replacing "EINTR" with "ERESTARTSYS" for mid-layer retries. Signed-off-by: Penchala Narasimha Reddy <ServeRAIDDriver@hcl.in> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi/aacraid/commsup.c')
-rw-r--r--drivers/scsi/aacraid/commsup.c72
1 files changed, 61 insertions, 11 deletions
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 956261f..94d2954 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -189,7 +189,14 @@ struct fib *aac_fib_alloc(struct aac_dev *dev)
void aac_fib_free(struct fib *fibptr)
{
- unsigned long flags;
+ unsigned long flags, flagsv;
+
+ spin_lock_irqsave(&fibptr->event_lock, flagsv);
+ if (fibptr->done == 2) {
+ spin_unlock_irqrestore(&fibptr->event_lock, flagsv);
+ return;
+ }
+ spin_unlock_irqrestore(&fibptr->event_lock, flagsv);
spin_lock_irqsave(&fibptr->dev->fib_lock, flags);
if (unlikely(fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT))
@@ -390,6 +397,8 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
struct hw_fib * hw_fib = fibptr->hw_fib_va;
unsigned long flags = 0;
unsigned long qflags;
+ unsigned long mflags = 0;
+
if (!(hw_fib->header.XferState & cpu_to_le32(HostOwned)))
return -EBUSY;
@@ -471,9 +480,31 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
if (!dev->queues)
return -EBUSY;
- if(wait)
+ if (wait) {
+
+ spin_lock_irqsave(&dev->manage_lock, mflags);
+ if (dev->management_fib_count >= AAC_NUM_MGT_FIB) {
+ printk(KERN_INFO "No management Fibs Available:%d\n",
+ dev->management_fib_count);
+ spin_unlock_irqrestore(&dev->manage_lock, mflags);
+ return -EBUSY;
+ }
+ dev->management_fib_count++;
+ spin_unlock_irqrestore(&dev->manage_lock, mflags);
spin_lock_irqsave(&fibptr->event_lock, flags);
- aac_adapter_deliver(fibptr);
+ }
+
+ if (aac_adapter_deliver(fibptr) != 0) {
+ printk(KERN_ERR "aac_fib_send: returned -EBUSY\n");
+ if (wait) {
+ spin_unlock_irqrestore(&fibptr->event_lock, flags);
+ spin_lock_irqsave(&dev->manage_lock, mflags);
+ dev->management_fib_count--;
+ spin_unlock_irqrestore(&dev->manage_lock, mflags);
+ }
+ return -EBUSY;
+ }
+
/*
* If the caller wanted us to wait for response wait now.
@@ -516,14 +547,15 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
udelay(5);
}
} else if (down_interruptible(&fibptr->event_wait)) {
- fibptr->done = 2;
- up(&fibptr->event_wait);
+ /* Do nothing ... satisfy
+ * down_interruptible must_check */
}
+
spin_lock_irqsave(&fibptr->event_lock, flags);
- if ((fibptr->done == 0) || (fibptr->done == 2)) {
+ if (fibptr->done == 0) {
fibptr->done = 2; /* Tell interrupt we aborted */
spin_unlock_irqrestore(&fibptr->event_lock, flags);
- return -EINTR;
+ return -ERESTARTSYS;
}
spin_unlock_irqrestore(&fibptr->event_lock, flags);
BUG_ON(fibptr->done == 0);
@@ -689,6 +721,7 @@ int aac_fib_adapter_complete(struct fib *fibptr, unsigned short size)
int aac_fib_complete(struct fib *fibptr)
{
+ unsigned long flags;
struct hw_fib * hw_fib = fibptr->hw_fib_va;
/*
@@ -709,6 +742,13 @@ int aac_fib_complete(struct fib *fibptr)
* command is complete that we had sent to the adapter and this
* cdb could be reused.
*/
+ spin_lock_irqsave(&fibptr->event_lock, flags);
+ if (fibptr->done == 2) {
+ spin_unlock_irqrestore(&fibptr->event_lock, flags);
+ return 0;
+ }
+ spin_unlock_irqrestore(&fibptr->event_lock, flags);
+
if((hw_fib->header.XferState & cpu_to_le32(SentFromHost)) &&
(hw_fib->header.XferState & cpu_to_le32(AdapterProcessed)))
{
@@ -1355,7 +1395,10 @@ int aac_reset_adapter(struct aac_dev * aac, int forced)
if (status >= 0)
aac_fib_complete(fibctx);
- aac_fib_free(fibctx);
+ /* FIB should be freed only after getting
+ * the response from the F/W */
+ if (status != -ERESTARTSYS)
+ aac_fib_free(fibctx);
}
}
@@ -1759,6 +1802,7 @@ int aac_command_thread(void *data)
struct fib *fibptr;
if ((fibptr = aac_fib_alloc(dev))) {
+ int status;
__le32 *info;
aac_fib_init(fibptr);
@@ -1769,15 +1813,21 @@ int aac_command_thread(void *data)
*info = cpu_to_le32(now.tv_sec);
- (void)aac_fib_send(SendHostTime,
+ status = aac_fib_send(SendHostTime,
fibptr,
sizeof(*info),
FsaNormal,
1, 1,
NULL,
NULL);
- aac_fib_complete(fibptr);
- aac_fib_free(fibptr);
+ /* Do not set XferState to zero unless
+ * receives a response from F/W */
+ if (status >= 0)
+ aac_fib_complete(fibptr);
+ /* FIB should be freed only after
+ * getting the response from the F/W */
+ if (status != -ERESTARTSYS)
+ aac_fib_free(fibptr);
}
difference = (long)(unsigned)update_interval*HZ;
} else {
OpenPOWER on IntegriCloud