diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /drivers/block/aoe/aoecmd.c | |
download | lwn-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz lwn-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'drivers/block/aoe/aoecmd.c')
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 629 |
1 files changed, 629 insertions, 0 deletions
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c new file mode 100644 index 000000000000..fb6d942a4565 --- /dev/null +++ b/drivers/block/aoe/aoecmd.c @@ -0,0 +1,629 @@ +/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ +/* + * aoecmd.c + * Filesystem request handling methods + */ + +#include <linux/hdreg.h> +#include <linux/blkdev.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include "aoe.h" + +#define TIMERTICK (HZ / 10) +#define MINTIMER (2 * TIMERTICK) +#define MAXTIMER (HZ << 1) +#define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */ + +static struct sk_buff * +new_skb(struct net_device *if_dev, ulong len) +{ + struct sk_buff *skb; + + skb = alloc_skb(len, GFP_ATOMIC); + if (skb) { + skb->nh.raw = skb->mac.raw = skb->data; + skb->dev = if_dev; + skb->protocol = __constant_htons(ETH_P_AOE); + skb->priority = 0; + skb_put(skb, len); + skb->next = skb->prev = NULL; + + /* tell the network layer not to perform IP checksums + * or to get the NIC to do it + */ + skb->ip_summed = CHECKSUM_NONE; + } + return skb; +} + +static struct sk_buff * +skb_prepare(struct aoedev *d, struct frame *f) +{ + struct sk_buff *skb; + char *p; + + skb = new_skb(d->ifp, f->ndata + f->writedatalen); + if (!skb) { + printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n"); + return NULL; + } + + p = skb->mac.raw; + memcpy(p, f->data, f->ndata); + + if (f->writedatalen) { + p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr); + memcpy(p, f->bufaddr, f->writedatalen); + } + + return skb; +} + +static struct frame * +getframe(struct aoedev *d, int tag) +{ + struct frame *f, *e; + + f = d->frames; + e = f + d->nframes; + for (; f<e; f++) + if (f->tag == tag) + return f; + return NULL; +} + +/* + * Leave the top bit clear so we have tagspace for userland. + * The bottom 16 bits are the xmit tick for rexmit/rttavg processing. + * This driver reserves tag -1 to mean "unused frame." + */ +static int +newtag(struct aoedev *d) +{ + register ulong n; + + n = jiffies & 0xffff; + return n |= (++d->lasttag & 0x7fff) << 16; +} + +static int +aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h) +{ + u16 type = __constant_cpu_to_be16(ETH_P_AOE); + u16 aoemajor = __cpu_to_be16(d->aoemajor); + u32 host_tag = newtag(d); + u32 tag = __cpu_to_be32(host_tag); + + memcpy(h->src, d->ifp->dev_addr, sizeof h->src); + memcpy(h->dst, d->addr, sizeof h->dst); + memcpy(h->type, &type, sizeof type); + h->verfl = AOE_HVER; + memcpy(h->major, &aoemajor, sizeof aoemajor); + h->minor = d->aoeminor; + h->cmd = AOECMD_ATA; + memcpy(h->tag, &tag, sizeof tag); + + return host_tag; +} + +static void +aoecmd_ata_rw(struct aoedev *d, struct frame *f) +{ + struct aoe_hdr *h; + struct aoe_atahdr *ah; + struct buf *buf; + struct sk_buff *skb; + ulong bcnt; + register sector_t sector; + char writebit, extbit; + + writebit = 0x10; + extbit = 0x4; + + buf = d->inprocess; + + sector = buf->sector; + bcnt = buf->bv_resid; + if (bcnt > MAXATADATA) + bcnt = MAXATADATA; + + /* initialize the headers & frame */ + h = (struct aoe_hdr *) f->data; + ah = (struct aoe_atahdr *) (h+1); + f->ndata = sizeof *h + sizeof *ah; + memset(h, 0, f->ndata); + f->tag = aoehdr_atainit(d, h); + f->waited = 0; + f->buf = buf; + f->bufaddr = buf->bufaddr; + + /* set up ata header */ + ah->scnt = bcnt >> 9; + ah->lba0 = sector; + ah->lba1 = sector >>= 8; + ah->lba2 = sector >>= 8; + ah->lba3 = sector >>= 8; + if (d->flags & DEVFL_EXT) { + ah->aflags |= AOEAFL_EXT; + ah->lba4 = sector >>= 8; + ah->lba5 = sector >>= 8; + } else { + extbit = 0; + ah->lba3 &= 0x0f; + ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ + } + + if (bio_data_dir(buf->bio) == WRITE) { + ah->aflags |= AOEAFL_WRITE; + f->writedatalen = bcnt; + } else { + writebit = 0; + f->writedatalen = 0; + } + + ah->cmdstat = WIN_READ | writebit | extbit; + + /* mark all tracking fields and load out */ + buf->nframesout += 1; + buf->bufaddr += bcnt; + buf->bv_resid -= bcnt; +/* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */ + buf->resid -= bcnt; + buf->sector += bcnt >> 9; + if (buf->resid == 0) { + d->inprocess = NULL; + } else if (buf->bv_resid == 0) { + buf->bv++; + buf->bv_resid = buf->bv->bv_len; + buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; + } + + skb = skb_prepare(d, f); + if (skb) { + skb->next = d->skblist; + d->skblist = skb; + } +} + +/* enters with d->lock held */ +void +aoecmd_work(struct aoedev *d) +{ + struct frame *f; + struct buf *buf; +loop: + f = getframe(d, FREETAG); + if (f == NULL) + return; + if (d->inprocess == NULL) { + if (list_empty(&d->bufq)) + return; + buf = container_of(d->bufq.next, struct buf, bufs); + list_del(d->bufq.next); +/*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */ + d->inprocess = buf; + } + aoecmd_ata_rw(d, f); + goto loop; +} + +static void +rexmit(struct aoedev *d, struct frame *f) +{ + struct sk_buff *skb; + struct aoe_hdr *h; + char buf[128]; + u32 n; + u32 net_tag; + + n = newtag(d); + + snprintf(buf, sizeof buf, + "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n", + "retransmit", + d->aoemajor, d->aoeminor, f->tag, jiffies, n); + aoechr_error(buf); + + h = (struct aoe_hdr *) f->data; + f->tag = n; + net_tag = __cpu_to_be32(n); + memcpy(h->tag, &net_tag, sizeof net_tag); + + skb = skb_prepare(d, f); + if (skb) { + skb->next = d->skblist; + d->skblist = skb; + } +} + +static int +tsince(int tag) +{ + int n; + + n = jiffies & 0xffff; + n -= tag & 0xffff; + if (n < 0) + n += 1<<16; + return n; +} + +static void +rexmit_timer(ulong vp) +{ + struct aoedev *d; + struct frame *f, *e; + struct sk_buff *sl; + register long timeout; + ulong flags, n; + + d = (struct aoedev *) vp; + sl = NULL; + + /* timeout is always ~150% of the moving average */ + timeout = d->rttavg; + timeout += timeout >> 1; + + spin_lock_irqsave(&d->lock, flags); + + if (d->flags & DEVFL_TKILL) { +tdie: spin_unlock_irqrestore(&d->lock, flags); + return; + } + f = d->frames; + e = f + d->nframes; + for (; f<e; f++) { + if (f->tag != FREETAG && tsince(f->tag) >= timeout) { + n = f->waited += timeout; + n /= HZ; + if (n > MAXWAIT) { /* waited too long. device failure. */ + aoedev_downdev(d); + goto tdie; + } + rexmit(d, f); + } + } + + sl = d->skblist; + d->skblist = NULL; + if (sl) { + n = d->rttavg <<= 1; + if (n > MAXTIMER) + d->rttavg = MAXTIMER; + } + + d->timer.expires = jiffies + TIMERTICK; + add_timer(&d->timer); + + spin_unlock_irqrestore(&d->lock, flags); + + aoenet_xmit(sl); +} + +static void +ataid_complete(struct aoedev *d, unsigned char *id) +{ + u64 ssize; + u16 n; + + /* word 83: command set supported */ + n = __le16_to_cpu(*((u16 *) &id[83<<1])); + + /* word 86: command set/feature enabled */ + n |= __le16_to_cpu(*((u16 *) &id[86<<1])); + + if (n & (1<<10)) { /* bit 10: LBA 48 */ + d->flags |= DEVFL_EXT; + + /* word 100: number lba48 sectors */ + ssize = __le64_to_cpu(*((u64 *) &id[100<<1])); + + /* set as in ide-disk.c:init_idedisk_capacity */ + d->geo.cylinders = ssize; + d->geo.cylinders /= (255 * 63); + d->geo.heads = 255; + d->geo.sectors = 63; + } else { + d->flags &= ~DEVFL_EXT; + + /* number lba28 sectors */ + ssize = __le32_to_cpu(*((u32 *) &id[60<<1])); + + /* NOTE: obsolete in ATA 6 */ + d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1])); + d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1])); + d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1])); + } + d->ssize = ssize; + d->geo.start = 0; + if (d->gd != NULL) { + d->gd->capacity = ssize; + d->flags |= DEVFL_UP; + return; + } + if (d->flags & DEVFL_WORKON) { + printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! " + "(This really shouldn't happen).\n"); + return; + } + INIT_WORK(&d->work, aoeblk_gdalloc, d); + schedule_work(&d->work); + d->flags |= DEVFL_WORKON; +} + +static void +calc_rttavg(struct aoedev *d, int rtt) +{ + register long n; + + n = rtt; + if (n < MINTIMER) + n = MINTIMER; + else if (n > MAXTIMER) + n = MAXTIMER; + + /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */ + n -= d->rttavg; + d->rttavg += n >> 2; +} + +void +aoecmd_ata_rsp(struct sk_buff *skb) +{ + struct aoedev *d; + struct aoe_hdr *hin; + struct aoe_atahdr *ahin, *ahout; + struct frame *f; + struct buf *buf; + struct sk_buff *sl; + register long n; + ulong flags; + char ebuf[128]; + + hin = (struct aoe_hdr *) skb->mac.raw; + d = aoedev_bymac(hin->src); + if (d == NULL) { + snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " + "for unknown device %d.%d\n", + __be16_to_cpu(*((u16 *) hin->major)), + hin->minor); + aoechr_error(ebuf); + return; + } + + spin_lock_irqsave(&d->lock, flags); + + f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag))); + if (f == NULL) { + spin_unlock_irqrestore(&d->lock, flags); + snprintf(ebuf, sizeof ebuf, + "%15s e%d.%d tag=%08x@%08lx\n", + "unexpected rsp", + __be16_to_cpu(*((u16 *) hin->major)), + hin->minor, + __be32_to_cpu(*((u32 *) hin->tag)), + jiffies); + aoechr_error(ebuf); + return; + } + + calc_rttavg(d, tsince(f->tag)); + + ahin = (struct aoe_atahdr *) (hin+1); + ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr)); + buf = f->buf; + + if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ + printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh " + "stat=%2.2Xh from e%ld.%ld\n", + ahout->cmdstat, ahin->cmdstat, + d->aoemajor, d->aoeminor); + if (buf) + buf->flags |= BUFFL_FAIL; + } else { + switch (ahout->cmdstat) { + case WIN_READ: + case WIN_READ_EXT: + n = ahout->scnt << 9; + if (skb->len - sizeof *hin - sizeof *ahin < n) { + printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt " + "ata data size in read. skb->len=%d\n", + skb->len); + /* fail frame f? just returning will rexmit. */ + spin_unlock_irqrestore(&d->lock, flags); + return; + } + memcpy(f->bufaddr, ahin+1, n); + case WIN_WRITE: + case WIN_WRITE_EXT: + break; + case WIN_IDENTIFY: + if (skb->len - sizeof *hin - sizeof *ahin < 512) { + printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size " + "in ataid. skb->len=%d\n", skb->len); + spin_unlock_irqrestore(&d->lock, flags); + return; + } + ataid_complete(d, (char *) (ahin+1)); + /* d->flags |= DEVFL_WC_UPDATE; */ + break; + default: + printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized " + "outbound ata command %2.2Xh for %d.%d\n", + ahout->cmdstat, + __be16_to_cpu(*((u16 *) hin->major)), + hin->minor); + } + } + + if (buf) { + buf->nframesout -= 1; + if (buf->nframesout == 0 && buf->resid == 0) { + n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; + bio_endio(buf->bio, buf->bio->bi_size, n); + mempool_free(buf, d->bufpool); + } + } + + f->buf = NULL; + f->tag = FREETAG; + + aoecmd_work(d); + + sl = d->skblist; + d->skblist = NULL; + + spin_unlock_irqrestore(&d->lock, flags); + + aoenet_xmit(sl); +} + +void +aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) +{ + struct aoe_hdr *h; + struct aoe_cfghdr *ch; + struct sk_buff *skb, *sl; + struct net_device *ifp; + u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE); + u16 net_aoemajor = __cpu_to_be16(aoemajor); + + sl = NULL; + + read_lock(&dev_base_lock); + for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) { + dev_hold(ifp); + if (!is_aoe_netif(ifp)) + continue; + + skb = new_skb(ifp, sizeof *h + sizeof *ch); + if (skb == NULL) { + printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n"); + continue; + } + h = (struct aoe_hdr *) skb->mac.raw; + memset(h, 0, sizeof *h + sizeof *ch); + + memset(h->dst, 0xff, sizeof h->dst); + memcpy(h->src, ifp->dev_addr, sizeof h->src); + memcpy(h->type, &aoe_type, sizeof aoe_type); + h->verfl = AOE_HVER; + memcpy(h->major, &net_aoemajor, sizeof net_aoemajor); + h->minor = aoeminor; + h->cmd = AOECMD_CFG; + + skb->next = sl; + sl = skb; + } + read_unlock(&dev_base_lock); + + aoenet_xmit(sl); +} + +/* + * Since we only call this in one place (and it only prepares one frame) + * we just return the skb. Usually we'd chain it up to the d->skblist. + */ +static struct sk_buff * +aoecmd_ata_id(struct aoedev *d) +{ + struct aoe_hdr *h; + struct aoe_atahdr *ah; + struct frame *f; + struct sk_buff *skb; + + f = getframe(d, FREETAG); + if (f == NULL) { + printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. " + "This shouldn't happen.\n"); + return NULL; + } + + /* initialize the headers & frame */ + h = (struct aoe_hdr *) f->data; + ah = (struct aoe_atahdr *) (h+1); + f->ndata = sizeof *h + sizeof *ah; + memset(h, 0, f->ndata); + f->tag = aoehdr_atainit(d, h); + f->waited = 0; + f->writedatalen = 0; + + /* this message initializes the device, so we reset the rttavg */ + d->rttavg = MAXTIMER; + + /* set up ata header */ + ah->scnt = 1; + ah->cmdstat = WIN_IDENTIFY; + ah->lba3 = 0xa0; + + skb = skb_prepare(d, f); + + /* we now want to start the rexmit tracking */ + d->flags &= ~DEVFL_TKILL; + d->timer.data = (ulong) d; + d->timer.function = rexmit_timer; + d->timer.expires = jiffies + TIMERTICK; + add_timer(&d->timer); + + return skb; +} + +void +aoecmd_cfg_rsp(struct sk_buff *skb) +{ + struct aoedev *d; + struct aoe_hdr *h; + struct aoe_cfghdr *ch; + ulong flags, bufcnt, sysminor, aoemajor; + struct sk_buff *sl; + enum { MAXFRAMES = 8, MAXSYSMINOR = 255 }; + + h = (struct aoe_hdr *) skb->mac.raw; + ch = (struct aoe_cfghdr *) (h+1); + + /* + * Enough people have their dip switches set backwards to + * warrant a loud message for this special case. + */ + aoemajor = __be16_to_cpu(*((u16 *) h->major)); + if (aoemajor == 0xfff) { + printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf " + "address is all ones. Check shelf dip switches\n"); + return; + } + + sysminor = SYSMINOR(aoemajor, h->minor); + if (sysminor > MAXSYSMINOR) { + printk(KERN_INFO "aoe: aoecmd_cfg_rsp: sysminor %ld too " + "large\n", sysminor); + return; + } + + bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt)); + if (bufcnt > MAXFRAMES) /* keep it reasonable */ + bufcnt = MAXFRAMES; + + d = aoedev_set(sysminor, h->src, skb->dev, bufcnt); + if (d == NULL) { + printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n"); + return; + } + + spin_lock_irqsave(&d->lock, flags); + + if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) { + spin_unlock_irqrestore(&d->lock, flags); + return; + } + + d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver)); + + /* we get here only if the device is new */ + sl = aoecmd_ata_id(d); + + spin_unlock_irqrestore(&d->lock, flags); + + aoenet_xmit(sl); +} + |