LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 02/12] handle multiple network paths to AoE device
  2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
  2007-06-26 18:50 ` [PATCH 03/12] mac_addr: avoid 64-bit arch compiler warnings Ed L. Cashin
@ 2007-06-26 18:50 ` Ed L. Cashin
  2007-07-03  4:29   ` Andrew Morton
  2007-06-26 18:50 ` [PATCH 05/12] eliminate goto and improve readability Ed L. Cashin
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

Handle multiple network paths to AoE device.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 drivers/block/aoe/aoe.h    |   58 +++--
 drivers/block/aoe/aoeblk.c |   63 ++++-
 drivers/block/aoe/aoechr.c |   14 +-
 drivers/block/aoe/aoecmd.c |  660 +++++++++++++++++++++++++++++--------------
 drivers/block/aoe/aoedev.c |  163 +++++------
 drivers/block/aoe/aoenet.c |    4 +-
 6 files changed, 630 insertions(+), 332 deletions(-)

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 2ce5ce9..069f04c 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -85,10 +85,8 @@ enum {
 	DEVFL_EXT = (1<<2),	/* device accepts lba48 commands */
 	DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */
 	DEVFL_GDALLOC = (1<<4),	/* need to alloc gendisk */
-	DEVFL_PAUSE = (1<<5),
+	DEVFL_KICKME = (1<<5),	/* slow polling network card catch */
 	DEVFL_NEWSIZE = (1<<6),	/* need to update dev size in block layer */
-	DEVFL_MAXBCNT = (1<<7), /* d->maxbcnt is not changeable */
-	DEVFL_KICKME = (1<<8),
 
 	BUFFL_FAIL = 1,
 };
@@ -97,17 +95,24 @@ enum {
 	DEFAULTBCNT = 2 * 512,	/* 2 sectors */
 	NPERSHELF = 16,		/* number of slots per shelf address */
 	FREETAG = -1,
-	MIN_BUFS = 8,
+	MIN_BUFS = 16,
+	NTARGETS = 8,
+	NAOEIFS = 8,
+
+	TIMERTICK = HZ / 10,
+	MINTIMER = HZ >> 2,
+	MAXTIMER = HZ << 1,
+	HELPWAIT = 20,
 };
 
 struct buf {
 	struct list_head bufs;
-	ulong start_time;	/* for disk stats */
+	ulong stime;	/* for disk stats */
 	ulong flags;
 	ulong nframesout;
-	char *bufaddr;
 	ulong resid;
 	ulong bv_resid;
+	ulong bv_off;
 	sector_t sector;
 	struct bio *bio;
 	struct bio_vec *bv;
@@ -123,19 +128,37 @@ struct frame {
 	struct sk_buff *skb;
 };
 
+struct aoeif {
+	struct net_device *nd;
+	unsigned char lost;
+	unsigned char lostjumbo;
+	ushort maxbcnt;
+};
+
+struct aoetgt {
+	unsigned char addr[6];
+	ushort nframes;
+	struct frame *frames;
+	struct aoeif ifs[NAOEIFS];
+	struct aoeif *ifp;	/* current aoeif in use */
+	ushort nout;
+	ushort maxout;
+	u16 lasttag;		/* last tag sent */
+	u16 useme;
+	ulong lastwadj;		/* last window adjustment */
+int wpkts, rpkts;
+};
+
 struct aoedev {
 	struct aoedev *next;
-	unsigned char addr[6];	/* remote mac addr */
-	ushort flags;
 	ulong sysminor;
 	ulong aoemajor;
-	ulong aoeminor;
+	u16 aoeminor;
+	u16 flags;
 	u16 nopen;		/* (bd_openers isn't available without sleeping) */
-	u16 lasttag;		/* last tag sent */
 	u16 rttavg;		/* round trip average of requests/responses */
 	u16 mintimer;
 	u16 fw_ver;		/* version of blade's firmware */
-	u16 maxbcnt;
 	struct work_struct work;/* disk create work struct */
 	struct gendisk *gd;
 	request_queue_t blkq;
@@ -143,15 +166,15 @@ struct aoedev {
 	sector_t ssize;
 	struct timer_list timer;
 	spinlock_t lock;
-	struct net_device *ifp;	/* interface ed is attached to */
 	struct sk_buff *sendq_hd; /* packets needing to be sent, list head */
 	struct sk_buff *sendq_tl;
 	mempool_t *bufpool;	/* for deadlock-free Buf allocation */
 	struct list_head bufq;	/* queue of bios to work on */
 	struct buf *inprocess;	/* the one we're currently working on */
-	ushort lostjumbo;
-	ushort nframes;		/* number of frames below */
-	struct frame *frames;
+	struct aoetgt *targets[NTARGETS];
+	struct aoetgt **tgt;	/* target in use when working */
+	struct aoetgt **htgt;	/* target needing rexmit assistance */
+//int ios[64];
 };
 
 
@@ -169,12 +192,13 @@ void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor);
 void aoecmd_ata_rsp(struct sk_buff *);
 void aoecmd_cfg_rsp(struct sk_buff *);
 void aoecmd_sleepwork(struct work_struct *);
-struct sk_buff *new_skb(ulong);
+void aoecmd_cleanslate(struct aoedev *);
+struct sk_buff *aoecmd_ata_id(struct aoedev *);
 
 int aoedev_init(void);
 void aoedev_exit(void);
 struct aoedev *aoedev_by_aoeaddr(int maj, int min);
-struct aoedev *aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt);
+struct aoedev *aoedev_by_sysminor_m(ulong sysminor);
 void aoedev_downdev(struct aoedev *d);
 int aoedev_isbusy(struct aoedev *d);
 
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 478489c..f6773ab 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -21,22 +21,55 @@ static ssize_t aoedisk_show_state(struct gendisk * disk, char *page)
 	return snprintf(page, PAGE_SIZE,
 			"%s%s\n",
 			(d->flags & DEVFL_UP) ? "up" : "down",
-			(d->flags & DEVFL_PAUSE) ? ",paused" :
+			(d->flags & DEVFL_KICKME) ? ",kickme" :
 			(d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
 	/* I'd rather see nopen exported so we can ditch closewait */
 }
 static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page)
 {
 	struct aoedev *d = disk->private_data;
+	struct aoetgt *t = d->targets[0];
 
+	if (t == NULL)
+		return snprintf(page, PAGE_SIZE, "none\n");
 	return snprintf(page, PAGE_SIZE, "%012llx\n",
-			(unsigned long long)mac_addr(d->addr));
+			(unsigned long long)mac_addr(t->addr));
 }
 static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page)
 {
 	struct aoedev *d = disk->private_data;
+	struct net_device *nds[8], **nd, **nnd, **ne;
+	struct aoetgt **t, **te;
+	struct aoeif *ifp, *e;
+	char *p;
+
+	memset(nds, 0, ARRAY_SIZE(nds));
+	nd = nds;
+	ne = nd + ARRAY_SIZE(nds);
+	t = d->targets;
+	te = t + NTARGETS;
+	for (; t<te && *t; t++) {
+		ifp = (*t)->ifs;
+		e = ifp + NAOEIFS;
+		for (; ifp<e && ifp->nd; ifp++) {
+			for (nnd=nds; nnd<nd; nnd++)
+				if (*nnd == ifp->nd)
+					break;
+			if (nnd == nd)
+			if (nd != ne)
+				*nd++ = ifp->nd;
+		}
+	}
 
-	return snprintf(page, PAGE_SIZE, "%s\n", d->ifp->name);
+	ne = nd;
+	nd = nds;
+	if (*nd == NULL)
+		return snprintf(page, PAGE_SIZE, "none\n");
+	for (p=page; nd<ne; nd++)
+		p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
+			p == page ? "" : ",", (*nd)->name);
+	p += snprintf(p, PAGE_SIZE - (p-page), "\n");
+	return p-page;
 }
 /* firmware version */
 static ssize_t aoedisk_show_fwver(struct gendisk * disk, char *page)
@@ -134,7 +167,23 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)
 
 	blk_queue_bounce(q, &bio);
 
+	if (bio == NULL) {
+		printk(KERN_ERR "aoe: bio is NULL\n");
+		BUG();
+		return 0;
+	}
 	d = bio->bi_bdev->bd_disk->private_data;
+	if (d == NULL) {
+		printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n");
+		BUG();
+		bio_endio(bio, bio->bi_size, -ENXIO);
+		return 0;
+	} else if (bio->bi_io_vec == NULL) {
+		printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
+		BUG();
+		bio_endio(bio, bio->bi_size, -ENXIO);
+		return 0;
+	}
 	buf = mempool_alloc(d->bufpool, GFP_NOIO);
 	if (buf == NULL) {
 		printk(KERN_INFO "aoe: buf allocation failure\n");
@@ -143,14 +192,14 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)
 	}
 	memset(buf, 0, sizeof(*buf));
 	INIT_LIST_HEAD(&buf->bufs);
-	buf->start_time = jiffies;
+	buf->stime = jiffies;
 	buf->bio = bio;
 	buf->resid = bio->bi_size;
 	buf->sector = bio->bi_sector;
 	buf->bv = &bio->bi_io_vec[bio->bi_idx];
-	WARN_ON(buf->bv->bv_len == 0);
 	buf->bv_resid = buf->bv->bv_len;
-	buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
+	WARN_ON(buf->bv_resid == 0);
+	buf->bv_off = buf->bv->bv_offset;
 
 	spin_lock_irqsave(&d->lock, flags);
 
@@ -234,7 +283,7 @@ aoeblk_gdalloc(void *vp)
 	gd->fops = &aoe_bdops;
 	gd->private_data = d;
 	gd->capacity = d->ssize;
-	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%ld",
+	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
 		d->aoemajor, d->aoeminor);
 
 	gd->queue = &d->blkq;
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 39e563e..9026c44 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -6,6 +6,7 @@
 
 #include <linux/hdreg.h>
 #include <linux/blkdev.h>
+#include <linux/delay.h>
 #include "aoe.h"
 
 enum {
@@ -68,6 +69,7 @@ revalidate(const char __user *str, size_t size)
 	int major, minor, n;
 	ulong flags;
 	struct aoedev *d;
+	struct sk_buff *skb;
 	char buf[16];
 
 	if (size >= sizeof buf)
@@ -85,13 +87,17 @@ revalidate(const char __user *str, size_t size)
 	d = aoedev_by_aoeaddr(major, minor);
 	if (!d)
 		return -EINVAL;
-
 	spin_lock_irqsave(&d->lock, flags);
-	d->flags &= ~DEVFL_MAXBCNT;
-	d->flags |= DEVFL_PAUSE;
+	aoecmd_cleanslate(d);
+loop:
+	skb = aoecmd_ata_id(d);
 	spin_unlock_irqrestore(&d->lock, flags);
+	if (!skb && !msleep_interruptible(200)) {
+		spin_lock_irqsave(&d->lock, flags);
+		goto loop;
+	}
+	aoenet_xmit(skb);
 	aoecmd_cfg(major, minor);
-
 	return 0;
 }
 
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 01fbdd3..8a3a973 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -9,18 +9,15 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/genhd.h>
+#include <linux/moduleparam.h>
 #include <asm/unaligned.h>
 #include "aoe.h"
 
-#define TIMERTICK (HZ / 10)
-#define MINTIMER (2 * TIMERTICK)
-#define MAXTIMER (HZ << 1)
-
 static int aoe_deadsecs = 60 * 3;
 module_param(aoe_deadsecs, int, 0644);
 MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
 
-struct sk_buff *
+static struct sk_buff *
 new_skb(ulong len)
 {
 	struct sk_buff *skb;
@@ -42,12 +39,12 @@ new_skb(ulong len)
 }
 
 static struct frame *
-getframe(struct aoedev *d, int tag)
+getframe(struct aoetgt *t, int tag)
 {
 	struct frame *f, *e;
 
-	f = d->frames;
-	e = f + d->nframes;
+	f = t->frames;
+	e = f + t->nframes;
 	for (; f<e; f++)
 		if (f->tag == tag)
 			return f;
@@ -60,21 +57,21 @@ getframe(struct aoedev *d, int tag)
  * This driver reserves tag -1 to mean "unused frame."
  */
 static int
-newtag(struct aoedev *d)
+newtag(struct aoetgt *t)
 {
 	register ulong n;
 
 	n = jiffies & 0xffff;
-	return n |= (++d->lasttag & 0x7fff) << 16;
+	return n |= (++t->lasttag & 0x7fff) << 16;
 }
 
 static int
-aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
+aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
 {
-	u32 host_tag = newtag(d);
+	u32 host_tag = newtag(t);
 
-	memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
-	memcpy(h->dst, d->addr, sizeof h->dst);
+	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
+	memcpy(h->dst, t->addr, sizeof h->dst);
 	h->type = __constant_cpu_to_be16(ETH_P_AOE);
 	h->verfl = AOE_HVER;
 	h->major = cpu_to_be16(d->aoemajor);
@@ -97,42 +94,101 @@ put_lba(struct aoe_atahdr *ah, sector_t lba)
 }
 
 static void
-aoecmd_ata_rw(struct aoedev *d, struct frame *f)
+ifrotate(struct aoetgt *t)
+{
+	t->ifp++;
+	if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
+		t->ifp = t->ifs;
+	if (t->ifp->nd == NULL) {
+		printk(KERN_INFO "aoe: no interface to rotate to\n");
+		BUG();
+	}
+}
+
+static struct frame *
+freeframe(struct aoedev *d)
 {
+	struct frame *f, *e;
+	struct aoetgt **t;
+	ulong n;
+
+	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
+		printk(KERN_ERR "aoe: NULL TARGETS!\n");
+		return NULL;
+	}
+	t = d->targets;
+	do {
+		if (t != d->htgt)
+		if ((*t)->ifp->nd)
+		if ((*t)->nout < (*t)->maxout) {
+			n = (*t)->nframes;
+			f = (*t)->frames;
+			e = f + n;
+			for (; f<e; f++) {
+				if (f->tag != FREETAG)
+					continue;
+				if (atomic_read(&skb_shinfo(f->skb)->dataref) != 1) {
+					n--;
+					continue;
+				}
+				skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
+				skb_trim(f->skb, 0);
+				d->tgt = t;
+				ifrotate(*t);
+				return f;
+			}
+			if (n == 0)	/* slow polling network card */
+				d->flags |= DEVFL_KICKME;
+		}
+		t++;
+	} while (t < &d->targets[NTARGETS] && *t);
+	return NULL;
+}
+
+static int
+aoecmd_ata_rw(struct aoedev *d)
+{
+	struct frame *f;
 	struct aoe_hdr *h;
 	struct aoe_atahdr *ah;
 	struct buf *buf;
+	struct bio_vec *bv;
+	struct aoetgt *t;
 	struct sk_buff *skb;
 	ulong bcnt;
-	register sector_t sector;
 	char writebit, extbit;
 
 	writebit = 0x10;
 	extbit = 0x4;
 
+	f = freeframe(d);
+	if (f == NULL)
+		return 0;
+	t = *d->tgt;
 	buf = d->inprocess;
-
-	sector = buf->sector;
-	bcnt = buf->bv_resid;
-	if (bcnt > d->maxbcnt)
-		bcnt = d->maxbcnt;
-
+	bv = buf->bv;
+	bcnt = t->ifp->maxbcnt;
+	if (bcnt == 0)
+		bcnt = DEFAULTBCNT;
+	if (bcnt > buf->bv_resid)
+		bcnt = buf->bv_resid;
 	/* initialize the headers & frame */
 	skb = f->skb;
 	h = aoe_hdr(skb);
 	ah = (struct aoe_atahdr *) (h+1);
 	skb_put(skb, sizeof *h + sizeof *ah);
 	memset(h, 0, skb->len);
-	f->tag = aoehdr_atainit(d, h);
+	f->tag = aoehdr_atainit(d, t, h);
+	t->nout++;
 	f->waited = 0;
 	f->buf = buf;
-	f->bufaddr = buf->bufaddr;
+	f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
 	f->bcnt = bcnt;
-	f->lba = sector;
+	f->lba = buf->sector;
 
 	/* set up ata header */
 	ah->scnt = bcnt >> 9;
-	put_lba(ah, sector);
+	put_lba(ah, buf->sector);
 	if (d->flags & DEVFL_EXT) {
 		ah->aflags |= AOEAFL_EXT;
 	} else {
@@ -140,14 +196,14 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
 		ah->lba3 &= 0x0f;
 		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
 	}
-
 	if (bio_data_dir(buf->bio) == WRITE) {
-		skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
-			offset_in_page(f->bufaddr), bcnt);
+		skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
 		ah->aflags |= AOEAFL_WRITE;
 		skb->len += bcnt;
 		skb->data_len = bcnt;
+		t->wpkts++;
 	} else {
+		t->rpkts++;
 		writebit = 0;
 	}
 
@@ -155,29 +211,29 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
 
 	/* mark all tracking fields and load out */
 	buf->nframesout += 1;
-	buf->bufaddr += bcnt;
+	buf->bv_off += bcnt;
 	buf->bv_resid -= bcnt;
-/* printk(KERN_DEBUG "aoe: bv_resid=%ld\n", buf->bv_resid); */
 	buf->resid -= bcnt;
 	buf->sector += bcnt >> 9;
 	if (buf->resid == 0) {
 		d->inprocess = NULL;
 	} else if (buf->bv_resid == 0) {
-		buf->bv++;
-		WARN_ON(buf->bv->bv_len == 0);
-		buf->bv_resid = buf->bv->bv_len;
-		buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
+		buf->bv = ++bv;
+		buf->bv_resid = bv->bv_len;
+		WARN_ON(buf->bv_resid == 0);
+		buf->bv_off = bv->bv_offset;
 	}
 
-	skb->dev = d->ifp;
+	skb->dev = t->ifp->nd;
 	skb = skb_clone(skb, GFP_ATOMIC);
-	if (skb == NULL)
-		return;
-	if (d->sendq_hd)
-		d->sendq_tl->next = skb;
-	else
-		d->sendq_hd = skb;
-	d->sendq_tl = skb;
+	if (skb) {
+		if (d->sendq_hd)
+			d->sendq_tl->next = skb;
+		else
+			d->sendq_hd = skb;
+		d->sendq_tl = skb;
+	}
+	return 1;
 }
 
 /* some callers cannot sleep, and they can call this function,
@@ -231,62 +287,8 @@ cont:
 	return sl;
 }
 
-static struct frame *
-freeframe(struct aoedev *d)
-{
-	struct frame *f, *e;
-	int n = 0;
-
-	f = d->frames;
-	e = f + d->nframes;
-	for (; f<e; f++) {
-		if (f->tag != FREETAG)
-			continue;
-		if (atomic_read(&skb_shinfo(f->skb)->dataref) == 1) {
-			skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
-			skb_trim(f->skb, 0);
-			return f;
-		}
-		n++;
-	}
-	if (n == d->nframes)	/* wait for network layer */
-		d->flags |= DEVFL_KICKME;
-
-	return NULL;
-}
-
-/* enters with d->lock held */
-void
-aoecmd_work(struct aoedev *d)
-{
-	struct frame *f;
-	struct buf *buf;
-
-	if (d->flags & DEVFL_PAUSE) {
-		if (!aoedev_isbusy(d))
-			d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor,
-						d->aoeminor, &d->sendq_tl);
-		return;
-	}
-
-loop:
-	f = freeframe(d);
-	if (f == NULL)
-		return;
-	if (d->inprocess == NULL) {
-		if (list_empty(&d->bufq))
-			return;
-		buf = container_of(d->bufq.next, struct buf, bufs);
-		list_del(d->bufq.next);
-/*printk(KERN_DEBUG "aoe: bi_size=%ld\n", buf->bio->bi_size); */
-		d->inprocess = buf;
-	}
-	aoecmd_ata_rw(d, f);
-	goto loop;
-}
-
 static void
-rexmit(struct aoedev *d, struct frame *f)
+resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
 {
 	struct sk_buff *skb;
 	struct aoe_hdr *h;
@@ -294,41 +296,44 @@ rexmit(struct aoedev *d, struct frame *f)
 	char buf[128];
 	u32 n;
 
-	n = newtag(d);
+	ifrotate(t);
+	n = newtag(t);
+	skb = f->skb;
+	h = aoe_hdr(skb);
+	ah = (struct aoe_atahdr *) (h+1);
 
 	snprintf(buf, sizeof buf,
-		"%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
-		"retransmit",
-		d->aoemajor, d->aoeminor, f->tag, jiffies, n);
+		"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%012llx d=%012llx nout=%d\n",
+		"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
+		mac_addr(h->src), mac_addr(h->dst), t->nout);
 	aoechr_error(buf);
 
-	skb = f->skb;
-	h = aoe_hdr(skb);
-	ah = (struct aoe_atahdr *) (h+1);
 	f->tag = n;
 	h->tag = cpu_to_be32(n);
-	memcpy(h->dst, d->addr, sizeof h->dst);
-	memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
-
-	n = DEFAULTBCNT / 512;
-	if (ah->scnt > n) {
-		ah->scnt = n;
+	memcpy(h->dst, t->addr, sizeof h->dst);
+	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
+
+	switch (ah->cmdstat) {
+	default:
+		break;
+	case WIN_READ:
+	case WIN_READ_EXT:
+	case WIN_WRITE:
+	case WIN_WRITE_EXT:
+		put_lba(ah, f->lba);
+
+		n = f->bcnt;
+		if (n > DEFAULTBCNT)
+			n = DEFAULTBCNT;
+		ah->scnt = n >> 9;
 		if (ah->aflags & AOEAFL_WRITE) {
 			skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
-				offset_in_page(f->bufaddr), DEFAULTBCNT);
-			skb->len = sizeof *h + sizeof *ah + DEFAULTBCNT;
-			skb->data_len = DEFAULTBCNT;
-		}
-		if (++d->lostjumbo > (d->nframes << 1))
-		if (d->maxbcnt != DEFAULTBCNT) {
-			printk(KERN_INFO "aoe: e%ld.%ld: too many lost jumbo on %s - using 1KB frames.\n",
-				d->aoemajor, d->aoeminor, d->ifp->name);
-			d->maxbcnt = DEFAULTBCNT;
-			d->flags |= DEVFL_MAXBCNT;
+				offset_in_page(f->bufaddr), n);
+			skb->len = sizeof *h + sizeof *ah + n;
+			skb->data_len = n;
 		}
 	}
-
-	skb->dev = d->ifp;
+	skb->dev = t->ifp->nd;
 	skb = skb_clone(skb, GFP_ATOMIC);
 	if (skb == NULL)
 		return;
@@ -351,10 +356,83 @@ tsince(int tag)
 	return n;
 }
 
+static struct aoeif *
+getif(struct aoetgt *t, struct net_device *nd)
+{
+	struct aoeif *p, *e;
+
+	p = t->ifs;
+	e = p + NAOEIFS;
+	for (; p<e; p++)
+		if (p->nd == nd)
+			return p;
+	return NULL;
+}
+
+static struct aoeif *
+addif(struct aoetgt *t, struct net_device *nd)
+{
+	struct aoeif *p;
+
+	p = getif(t, NULL);
+	if (!p)
+		return NULL;
+	p->nd = nd;
+	p->maxbcnt = DEFAULTBCNT;
+	p->lost = p->lostjumbo = 0;
+	return p;
+}
+
+static void
+ejectif(struct aoetgt *t, struct aoeif *ifp)
+{
+	struct aoeif *e;
+	ulong n;
+
+	e = t->ifs + NAOEIFS - 1;
+	n = (e - ifp) * sizeof *ifp;
+	memmove(ifp, ifp+1, n);
+	e->nd = NULL;
+}
+
+static int
+sthtith(struct aoedev *d)
+{
+	struct frame *f, *e, *nf;
+	struct sk_buff *skb;
+	struct aoetgt *ht = *d->htgt;
+
+	f = ht->frames;
+	e = f + ht->nframes;
+	for (; f<e; f++) {
+		if (f->tag == FREETAG)
+			continue;
+		nf = freeframe(d);
+		if (!nf)
+			return 0;
+		skb = nf->skb;
+		*nf = *f;
+		f->skb = skb;
+		f->tag = FREETAG;
+		nf->waited = 0;
+		ht->nout--;
+		(*d->tgt)->nout++;
+		resend(d, *d->tgt, nf);
+	}
+	/* he's clean, he's useless.  take away his interfaces */
+	memset(ht->ifs, 0, sizeof ht->ifs);
+	d->htgt = NULL;
+	return 1;
+}
+
+#define ATASCNT(raw) (((struct aoe_atahdr *)(((struct aoe_hdr *)raw)+1))->scnt)
+
 static void
 rexmit_timer(ulong vp)
 {
 	struct aoedev *d;
+	struct aoetgt *t, **tt, **te;
+	struct aoeif *ifp;
 	struct frame *f, *e;
 	struct sk_buff *sl;
 	register long timeout;
@@ -373,31 +451,75 @@ rexmit_timer(ulong vp)
 		spin_unlock_irqrestore(&d->lock, flags);
 		return;
 	}
-	f = d->frames;
-	e = f + d->nframes;
-	for (; f<e; f++) {
-		if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
+	tt = d->targets;
+	te = tt + NTARGETS;
+	for (; tt<te && *tt; tt++) {
+		t = *tt;
+		f = t->frames;
+		e = f + t->nframes;
+		for (; f<e; f++) {
+			if (f->tag == FREETAG
+			|| tsince(f->tag) < timeout)
+				continue;
 			n = f->waited += timeout;
 			n /= HZ;
-			if (n > aoe_deadsecs) { /* waited too long for response */
+			if (n > aoe_deadsecs) { /* waited too long.  device failure. */
 				aoedev_downdev(d);
 				break;
 			}
-			rexmit(d, f);
+
+			if (n > HELPWAIT) /* see if another target can help */
+			if (tt != d->targets || d->targets[1])
+				d->htgt = tt;
+
+			if (t->nout == t->maxout) {
+				if (t->maxout > 1)
+					t->maxout--;
+				t->lastwadj = jiffies;
+			}
+
+			ifp = getif(t, f->skb->dev);
+			if (ifp && ++ifp->lost > (t->nframes << 1))
+			if (ifp != t->ifs || t->ifs[1].nd) {
+				ejectif(t, ifp);
+				ifp = NULL;
+			}
+
+			if (ATASCNT(aoe_hdr(f->skb)) > DEFAULTBCNT / 512)
+			if (ifp && ++ifp->lostjumbo > (t->nframes << 1))
+			if (ifp->maxbcnt != DEFAULTBCNT) {
+				printk(KERN_INFO "aoe: e%ld.%d: too many lost jumbo on %s:%012llx - "
+					"falling back to %d frames.\n",
+					d->aoemajor, d->aoeminor,
+					ifp->nd->name, mac_addr(t->addr),
+					DEFAULTBCNT);
+				ifp->maxbcnt = 0;
+			}
+			resend(d, t, f);
+		}
+
+		/* window check */
+		if (t->nout == t->maxout)
+		if (t->maxout < t->nframes)
+		if ((jiffies - t->lastwadj)/HZ > 10) {
+			t->maxout++;
+			t->lastwadj = jiffies;
 		}
 	}
-	if (d->flags & DEVFL_KICKME) {
+
+	if (d->sendq_hd) {
+		n = d->rttavg <<= 1;
+		if (n > MAXTIMER)
+			d->rttavg = MAXTIMER;
+	}
+
+	if (d->flags & DEVFL_KICKME || d->htgt) {
 		d->flags &= ~DEVFL_KICKME;
 		aoecmd_work(d);
 	}
 
 	sl = d->sendq_hd;
 	d->sendq_hd = d->sendq_tl = NULL;
-	if (sl) {
-		n = d->rttavg <<= 1;
-		if (n > MAXTIMER)
-			d->rttavg = MAXTIMER;
-	}
 
 	d->timer.expires = jiffies + TIMERTICK;
 	add_timer(&d->timer);
@@ -407,6 +529,25 @@ rexmit_timer(ulong vp)
 	aoenet_xmit(sl);
 }
 
+/* enters with d->lock held */
+void
+aoecmd_work(struct aoedev *d)
+{
+	struct buf *buf;
+loop:
+	if (d->htgt && !sthtith(d))
+		return;
+	if (d->inprocess == NULL) {
+		if (list_empty(&d->bufq))
+			return;
+		buf = container_of(d->bufq.next, struct buf, bufs);
+		list_del(d->bufq.next);
+		d->inprocess = buf;
+	}
+	if (aoecmd_ata_rw(d))
+		goto loop;
+}
+
 /* this function performs work that has been deferred until sleeping is OK
  */
 void
@@ -439,7 +580,7 @@ aoecmd_sleepwork(struct work_struct *work)
 }
 
 static void
-ataid_complete(struct aoedev *d, unsigned char *id)
+ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 {
 	u64 ssize;
 	u16 n;
@@ -475,7 +616,7 @@ ataid_complete(struct aoedev *d, unsigned char *id)
 
 	if (d->ssize != ssize)
 		printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n",
-			(unsigned long long)mac_addr(d->addr),
+			(unsigned long long)mac_addr(t->addr),
 			d->aoemajor, d->aoeminor,
 			d->fw_ver, (long long)ssize);
 	d->ssize = ssize;
@@ -483,15 +624,8 @@ ataid_complete(struct aoedev *d, unsigned char *id)
 	if (d->gd != NULL) {
 		d->gd->capacity = ssize;
 		d->flags |= DEVFL_NEWSIZE;
-	} else {
-		if (d->flags & DEVFL_GDALLOC) {
-			printk(KERN_ERR "aoe: can't schedule work for e%lu.%lu, %s\n",
-			       d->aoemajor, d->aoeminor,
-			       "it's already on!  This shouldn't happen.\n");
-			return;
-		}
+	} else
 		d->flags |= DEVFL_GDALLOC;
-	}
 	schedule_work(&d->work);
 }
 
@@ -518,6 +652,31 @@ calc_rttavg(struct aoedev *d, int rtt)
 	d->rttavg += n >> 2;
 }
 
+static struct aoetgt *
+gettgt(struct aoedev *d, char *addr)
+{
+	struct aoetgt **t, **e;
+
+	t = d->targets;
+	e = t + NTARGETS;
+	for(; t<e && *t; t++)
+		if (memcmp((*t)->addr, addr, sizeof (*t)->addr) == 0)
+			return *t;
+	return NULL;
+}
+
+static inline void
+diskstats(struct gendisk *disk, struct bio *bio, ulong duration)
+{
+	unsigned long n_sect = bio->bi_size >> 9;
+	const int rw = bio_data_dir(bio);
+
+	disk_stat_inc(disk, ios[rw]);
+	disk_stat_add(disk, ticks[rw], duration);
+	disk_stat_add(disk, sectors[rw], n_sect);
+	disk_stat_add(disk, io_ticks, duration);
+}
+
 void
 aoecmd_ata_rsp(struct sk_buff *skb)
 {
@@ -527,6 +686,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	struct frame *f;
 	struct buf *buf;
 	struct sk_buff *sl;
+	struct aoetgt *t;
+	struct aoeif *ifp;
 	register long n;
 	ulong flags;
 	char ebuf[128];
@@ -546,7 +707,15 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	spin_lock_irqsave(&d->lock, flags);
 
 	n = be32_to_cpu(get_unaligned(&hin->tag));
-	f = getframe(d, n);
+	t = gettgt(d, hin->src);
+	if (t == NULL) {
+		printk(KERN_INFO "aoe: can't find target e%ld.%d:%012llx\n",
+			d->aoemajor, d->aoeminor,
+			(unsigned long long) mac_addr(hin->src));
+		spin_unlock_irqrestore(&d->lock, flags);
+		return;
+	}
+	f = getframe(t, n);
 	if (f == NULL) {
 		calc_rttavg(d, -tsince(n));
 		spin_unlock_irqrestore(&d->lock, flags);
@@ -568,8 +737,6 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	ahout = (struct aoe_atahdr *) (hout+1);
 	buf = f->buf;
 
-	if (ahout->cmdstat == WIN_IDENTIFY)
-		d->flags &= ~DEVFL_PAUSE;
 	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
 		printk(KERN_ERR
 			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n",
@@ -578,14 +745,16 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 		if (buf)
 			buf->flags |= BUFFL_FAIL;
 	} else {
+		if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
+			d->htgt = NULL;
 		n = ahout->scnt << 9;
 		switch (ahout->cmdstat) {
 		case WIN_READ:
 		case WIN_READ_EXT:
 			if (skb->len - sizeof *hin - sizeof *ahin < n) {
 				printk(KERN_ERR
-					"aoe: runt data size in read.  skb->len=%d\n",
-					skb->len);
+					"aoe: %s.  skb->len=%d need=%ld\n",
+					"runt data size in read", skb->len, n);
 				/* fail frame f?  just returning will rexmit. */
 				spin_unlock_irqrestore(&d->lock, flags);
 				return;
@@ -593,32 +762,18 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 			memcpy(f->bufaddr, ahin+1, n);
 		case WIN_WRITE:
 		case WIN_WRITE_EXT:
+			ifp = getif(t, skb->dev);
+			if (ifp) {
+				ifp->lost = 0;
+				if (n > DEFAULTBCNT)
+					ifp->lostjumbo = 0;
+			}
 			if (f->bcnt -= n) {
-				skb = f->skb;
+				f->lba += n >> 9;
 				f->bufaddr += n;
-				put_lba(ahout, f->lba += ahout->scnt);
-				n = f->bcnt;
-				if (n > DEFAULTBCNT)
-					n = DEFAULTBCNT;
-				ahout->scnt = n >> 9;
-				if (ahout->aflags & AOEAFL_WRITE) {
-					skb_fill_page_desc(skb, 0,
-						virt_to_page(f->bufaddr),
-						offset_in_page(f->bufaddr), n);
-					skb->len = sizeof *hout + sizeof *ahout + n;
-					skb->data_len = n;
-				}
-				f->tag = newtag(d);
-				hout->tag = cpu_to_be32(f->tag);
-				skb->dev = d->ifp;
-				skb = skb_clone(skb, GFP_ATOMIC);
-				spin_unlock_irqrestore(&d->lock, flags);
-				if (skb)
-					aoenet_xmit(skb);
-				return;
+				resend(d, t, f);
+				goto xmit;
 			}
-			if (n > DEFAULTBCNT)
-				d->lostjumbo = 0;
 			break;
 		case WIN_IDENTIFY:
 			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
@@ -628,7 +783,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 				spin_unlock_irqrestore(&d->lock, flags);
 				return;
 			}
-			ataid_complete(d, (char *) (ahin+1));
+			ataid_complete(d, t, (char *) (ahin+1));
 			break;
 		default:
 			printk(KERN_INFO
@@ -639,28 +794,19 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 		}
 	}
 
-	if (buf) {
-		buf->nframesout -= 1;
-		if (buf->nframesout == 0 && buf->resid == 0) {
-			unsigned long duration = jiffies - buf->start_time;
-			unsigned long n_sect = buf->bio->bi_size >> 9;
-			struct gendisk *disk = d->gd;
-			const int rw = bio_data_dir(buf->bio);
-
-			disk_stat_inc(disk, ios[rw]);
-			disk_stat_add(disk, ticks[rw], duration);
-			disk_stat_add(disk, sectors[rw], n_sect);
-			disk_stat_add(disk, io_ticks, duration);
-			n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
-			bio_endio(buf->bio, buf->bio->bi_size, n);
-			mempool_free(buf, d->bufpool);
-		}
+	if (buf && --buf->nframesout == 0 && buf->resid == 0) {
+		diskstats(d->gd, buf->bio, jiffies - buf->stime);
+		n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
+		bio_endio(buf->bio, buf->bio->bi_size, n);
+		mempool_free(buf, d->bufpool);
 	}
 
 	f->buf = NULL;
 	f->tag = FREETAG;
+	t->nout--;
 
 	aoecmd_work(d);
+xmit:
 	sl = d->sendq_hd;
 	d->sendq_hd = d->sendq_tl = NULL;
 
@@ -678,23 +824,20 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
 	aoenet_xmit(sl);
 }
  
-/*
- * Since we only call this in one place (and it only prepares one frame)
- * we just return the skb.  Usually we'd chain it up to the aoedev sendq.
- */
-static struct sk_buff *
+struct sk_buff *
 aoecmd_ata_id(struct aoedev *d)
 {
 	struct aoe_hdr *h;
 	struct aoe_atahdr *ah;
 	struct frame *f;
 	struct sk_buff *skb;
+	struct aoetgt *t;
 
 	f = freeframe(d);
-	if (f == NULL) {
-		printk(KERN_ERR "aoe: can't get a frame. This shouldn't happen.\n");
+	if (f == NULL)
 		return NULL;
-	}
+
+	t = *d->tgt;
 
 	/* initialize the headers & frame */
 	skb = f->skb;
@@ -702,7 +845,8 @@ aoecmd_ata_id(struct aoedev *d)
 	ah = (struct aoe_atahdr *) (h+1);
 	skb_put(skb, sizeof *h + sizeof *ah);
 	memset(h, 0, skb->len);
-	f->tag = aoehdr_atainit(d, h);
+	f->tag = aoehdr_atainit(d, t, h);
+	t->nout++;
 	f->waited = 0;
 
 	/* set up ata header */
@@ -710,7 +854,7 @@ aoecmd_ata_id(struct aoedev *d)
 	ah->cmdstat = WIN_IDENTIFY;
 	ah->lba3 = 0xa0;
 
-	skb->dev = d->ifp;
+	skb->dev = t->ifp->nd;
 
 	d->rttavg = MAXTIMER;
 	d->timer.function = rexmit_timer;
@@ -718,12 +862,66 @@ aoecmd_ata_id(struct aoedev *d)
 	return skb_clone(skb, GFP_ATOMIC);
 }
  
+static struct aoetgt *
+addtgt(struct aoedev *d, char *addr, ulong nframes)
+{
+	struct aoetgt *t, **tt, **te;
+	struct frame *f, *e;
+
+	tt = d->targets;
+	te = tt + NTARGETS;
+	for (; tt<te; tt++) {
+		if (*tt == NULL)
+			break;
+		else if (memcmp((*tt)->addr, addr, 6) > 0) {
+			memmove(tt+1, tt, (void *)te-(void *)(tt+1));
+			break;
+		}
+	}
+	if (tt == te)
+		return NULL;
+
+	t = kcalloc(1, sizeof *t, GFP_ATOMIC);
+	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
+	switch (!t || !f) {
+	case 0:
+		t->nframes = nframes;
+		t->frames = f;
+		e = f + nframes;
+		for (; f<e; f++) {
+			f->tag = FREETAG;
+			f->skb = new_skb(ETH_ZLEN);
+			if (!f->skb)
+				break;
+		}
+		if (f == e)
+			break;
+		while (f > t->frames) {
+			f--;
+			dev_kfree_skb(f->skb);
+		}
+	default:
+		if (f)
+			kfree(f);
+		if (t)
+			kfree(t);
+		return NULL;
+	}
+
+	memcpy(t->addr, addr, sizeof t->addr);
+	t->ifp = t->ifs;
+	t->maxout = t->nframes;
+	return *tt = t;
+}
+
 void
 aoecmd_cfg_rsp(struct sk_buff *skb)
 {
 	struct aoedev *d;
 	struct aoe_hdr *h;
 	struct aoe_cfghdr *ch;
+	struct aoetgt *t;
+	struct aoeif *ifp;
 	ulong flags, sysminor, aoemajor;
 	struct sk_buff *sl;
 	enum { MAXFRAMES = 16 };
@@ -754,7 +952,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
 	if (n > MAXFRAMES)	/* keep it reasonable */
 		n = MAXFRAMES;
 
-	d = aoedev_by_sysminor_m(sysminor, n);
+	d = aoedev_by_sysminor_m(sysminor);
 	if (d == NULL) {
 		printk(KERN_INFO "aoe: device sysminor_m failure\n");
 		return;
@@ -762,38 +960,70 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
 
 	spin_lock_irqsave(&d->lock, flags);
 
-	/* permit device to migrate mac and network interface */
-	d->ifp = skb->dev;
-	memcpy(d->addr, h->src, sizeof d->addr);
-	if (!(d->flags & DEVFL_MAXBCNT)) {
-		n = d->ifp->mtu;
+	t = gettgt(d, h->src);
+	if (!t) {
+		t = addtgt(d, h->src, n);
+		if (!t) {
+			printk(KERN_INFO "aoe: device addtgt failure; too many targets?\n");
+			spin_unlock_irqrestore(&d->lock, flags);
+			return;
+		}
+	}
+	ifp = getif(t, skb->dev);
+	if (!ifp) {
+		if (!(ifp = addif(t, skb->dev))) {
+			printk(KERN_INFO "aoe: device addif failure; too many interfaces?\n");
+			spin_unlock_irqrestore(&d->lock, flags);
+			return;
+		}
+	}
+	if (ifp->maxbcnt) {
+		n = ifp->nd->mtu;
 		n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
 		n /= 512;
 		if (n > ch->scnt)
 			n = ch->scnt;
 		n = n ? n * 512 : DEFAULTBCNT;
-		if (n != d->maxbcnt) {
+		if (n != ifp->maxbcnt) {
 			printk(KERN_INFO
-				"aoe: e%ld.%ld: setting %d byte data frames on %s\n",
-				d->aoemajor, d->aoeminor, n, d->ifp->name);
-			d->maxbcnt = n;
+				"aoe: e%ld.%d: setting %d byte data frames on %s:%012llx\n",
+				d->aoemajor, d->aoeminor, n, ifp->nd->name,
+				(unsigned long long) mac_addr(t->addr));
+			ifp->maxbcnt = n;
 		}
 	}
 
 	/* don't change users' perspective */
-	if (d->nopen && !(d->flags & DEVFL_PAUSE)) {
+	if (d->nopen) {
 		spin_unlock_irqrestore(&d->lock, flags);
 		return;
 	}
-	d->flags |= DEVFL_PAUSE;	/* force pause */
-	d->mintimer = MINTIMER;
 	d->fw_ver = be16_to_cpu(ch->fwver);
 
-	/* check for already outstanding ataid */
-	sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL;
+	sl = aoecmd_ata_id(d);
 
 	spin_unlock_irqrestore(&d->lock, flags);
 
 	aoenet_xmit(sl);
 }
 
+void
+aoecmd_cleanslate(struct aoedev *d)
+{
+	struct aoetgt **t, **te;
+	struct aoeif *p, *e;
+
+	d->mintimer = MINTIMER;
+
+	t = d->targets;
+	te = t + NTARGETS;
+	for (; t<te && *t; t++) {
+		(*t)->maxout = (*t)->nframes;
+		p = (*t)->ifs;
+		e = p + NAOEIFS;
+		for (; p<e; p++) {
+			p->lostjumbo = p->lost = 0;
+			p->maxbcnt = DEFAULTBCNT;
+		}
+	}
+}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 05a9719..04c75b4 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -15,15 +15,18 @@ static spinlock_t devlist_lock;
 int
 aoedev_isbusy(struct aoedev *d)
 {
+	struct aoetgt **t, **te;
 	struct frame *f, *e;
 
-	f = d->frames;
-	e = f + d->nframes;
-	do {
-		if (f->tag != FREETAG)
-			return 1;
-	} while (++f < e);
-
+	t = d->targets;
+	te = t + NTARGETS;
+	for (; t<te && *t; t++) {
+		f = (*t)->frames;
+		e = f + (*t)->nframes;
+		for (; f<e; f++)
+			if (f->tag != FREETAG)
+				return 1;
+	}
 	return 0;
 }
 
@@ -55,75 +58,41 @@ dummy_timer(ulong vp)
 	add_timer(&d->timer);
 }
 
-/* called with devlist lock held */
-static struct aoedev *
-aoedev_newdev(ulong nframes)
-{
-	struct aoedev *d;
-	struct frame *f, *e;
-
-	d = kzalloc(sizeof *d, GFP_ATOMIC);
-	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
- 	switch (!d || !f) {
- 	case 0:
- 		d->nframes = nframes;
- 		d->frames = f;
- 		e = f + nframes;
- 		for (; f<e; f++) {
- 			f->tag = FREETAG;
- 			f->skb = new_skb(ETH_ZLEN);
- 			if (!f->skb)
- 				break;
- 		}
- 		if (f == e)
- 			break;
- 		while (f > d->frames) {
- 			f--;
- 			dev_kfree_skb(f->skb);
- 		}
- 	default:
- 		if (f)
- 			kfree(f);
- 		if (d)
- 			kfree(d);
-		return NULL;
-	}
-	INIT_WORK(&d->work, aoecmd_sleepwork);
-	spin_lock_init(&d->lock);
-	init_timer(&d->timer);
-	d->timer.data = (ulong) d;
-	d->timer.function = dummy_timer;
-	d->timer.expires = jiffies + HZ;
-	add_timer(&d->timer);
-	d->bufpool = NULL;	/* defer to aoeblk_gdalloc */
-	INIT_LIST_HEAD(&d->bufq);
-	d->next = devlist;
-	devlist = d;
-
-	return d;
-}
-
 void
 aoedev_downdev(struct aoedev *d)
 {
+	struct aoetgt **t, **te;
 	struct frame *f, *e;
 	struct buf *buf;
 	struct bio *bio;
 
-	f = d->frames;
-	e = f + d->nframes;
-	for (; f<e; f->tag = FREETAG, f->buf = NULL, f++) {
-		if (f->tag == FREETAG || f->buf == NULL)
-			continue;
-		buf = f->buf;
-		bio = buf->bio;
-		if (--buf->nframesout == 0) {
-			mempool_free(buf, d->bufpool);
-			bio_endio(bio, bio->bi_size, -EIO);
+	t = d->targets;
+	te = t + NTARGETS;
+	for (; t<te && *t; t++) {
+		f = (*t)->frames;
+		e = f + (*t)->nframes;
+		for (; f<e; f->tag = FREETAG, f->buf = NULL, f++) {
+			if (f->tag == FREETAG || f->buf == NULL)
+				continue;
+			buf = f->buf;
+			bio = buf->bio;
+			if (--buf->nframesout == 0)
+			if (buf != d->inprocess) {
+				mempool_free(buf, d->bufpool);
+				bio_endio(bio, bio->bi_size, -EIO);
+			}
 		}
-		skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
+		(*t)->maxout = (*t)->nframes;
+		(*t)->nout = 0;
+	}
+	buf = d->inprocess;
+	if (buf) {
+		bio = buf->bio;
+		mempool_free(buf, d->bufpool);
+		bio_endio(bio, bio->bi_size, -EIO);
 	}
 	d->inprocess = NULL;
+	d->htgt = NULL;
 
 	while (!list_empty(&d->bufq)) {
 		buf = container_of(d->bufq.next, struct buf, bufs);
@@ -136,12 +105,12 @@ aoedev_downdev(struct aoedev *d)
 	if (d->gd)
 		d->gd->capacity = 0;
 
-	d->flags &= ~(DEVFL_UP | DEVFL_PAUSE);
+	d->flags &= ~DEVFL_UP;
 }
 
 /* find it or malloc it */
 struct aoedev *
-aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt)
+aoedev_by_sysminor_m(ulong sysminor)
 {
 	struct aoedev *d;
 	ulong flags;
@@ -151,40 +120,60 @@ aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt)
 	for (d=devlist; d; d=d->next)
 		if (d->sysminor == sysminor)
 			break;
-
-	if (d == NULL) {
-		d = aoedev_newdev(bufcnt);
-	 	if (d == NULL) {
-			spin_unlock_irqrestore(&devlist_lock, flags);
-			printk(KERN_INFO "aoe: aoedev_newdev failure.\n");
-			return NULL;
-		}
-		d->sysminor = sysminor;
-		d->aoemajor = AOEMAJOR(sysminor);
-		d->aoeminor = AOEMINOR(sysminor);
+	if (d || !(d = kcalloc(1, sizeof *d, GFP_ATOMIC))) {
+		spin_unlock_irqrestore(&devlist_lock, flags);
+		return d;
 	}
+	INIT_WORK(&d->work, aoecmd_sleepwork);
+	spin_lock_init(&d->lock);
+	init_timer(&d->timer);
+	d->timer.data = (ulong) d;
+	d->timer.function = dummy_timer;
+	d->timer.expires = jiffies + HZ;
+	add_timer(&d->timer);
+	d->bufpool = NULL;	/* defer to aoeblk_gdalloc */
+	d->tgt = d->targets;
+	INIT_LIST_HEAD(&d->bufq);
+	d->sysminor = sysminor;
+	d->aoemajor = AOEMAJOR(sysminor);
+	d->aoeminor = AOEMINOR(sysminor);
+	d->mintimer = MINTIMER;
+	d->next = devlist;
+	devlist = d;
 
 	spin_unlock_irqrestore(&devlist_lock, flags);
 	return d;
 }
 
 static void
-aoedev_freedev(struct aoedev *d)
+freetgt(struct aoetgt *t)
 {
 	struct frame *f, *e;
 
+	f = t->frames;
+	e = f + t->nframes;
+	for (; f<e; f++) {
+		skb_shinfo(f->skb)->nr_frags = 0;
+		dev_kfree_skb(f->skb);
+	}
+	kfree(t->frames);
+	kfree(t);
+}
+
+static void
+aoedev_freedev(struct aoedev *d)
+{
+	struct aoetgt **t, **e;
+
 	if (d->gd) {
 		aoedisk_rm_sysfs(d);
 		del_gendisk(d->gd);
 		put_disk(d->gd);
 	}
-	f = d->frames;
-	e = f + d->nframes;
-	for (; f<e; f++) {
-		skb_shinfo(f->skb)->nr_frags = 0;
-		dev_kfree_skb(f->skb);
-	}
-	kfree(d->frames);
+	t = d->targets;
+	e = t + NTARGETS;
+	for (; t<e && *t; t++)
+		freetgt(*t);
 	if (d->bufpool)
 		mempool_destroy(d->bufpool);
 	kfree(d);
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index f9ddfda..f335099 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -133,8 +133,8 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt,
 		if (n > NECODES)
 			n = 0;
 		if (net_ratelimit())
-			printk(KERN_ERR "aoe: error packet from %d.%d; ecode=%d '%s'\n",
-			       be16_to_cpu(get_unaligned(&h->major)), h->minor,
+			printk(KERN_ERR "aoe: error packet from %d.%d@%s; ecode=%d '%s'\n",
+			       be16_to_cpu(get_unaligned(&h->major)), h->minor, skb->dev->name,
 			       h->err, aoe_errlist[n]);
 		goto exit;
 	}
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 01/12] bring driver version number to 47
@ 2007-06-26 18:50 Ed L. Cashin
  2007-06-26 18:50 ` [PATCH 03/12] mac_addr: avoid 64-bit arch compiler warnings Ed L. Cashin
                   ` (10 more replies)
  0 siblings, 11 replies; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

These patches were made against kernel 2.6.22-rc4.

Bring driver version number to 47.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 drivers/block/aoe/aoe.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 1d84668..2ce5ce9 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -1,5 +1,5 @@
 /* Copyright (c) 2006 Coraid, Inc.  See COPYING for GPL terms. */
-#define VERSION "32"
+#define VERSION "47"
 #define AOE_MAJOR 152
 #define DEVICE_NAME "aoe"
 
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 03/12] mac_addr: avoid 64-bit arch compiler warnings
  2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
@ 2007-06-26 18:50 ` Ed L. Cashin
  2007-06-26 18:50 ` [PATCH 02/12] handle multiple network paths to AoE device Ed L. Cashin
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

By returning unsigned long long, mac_addr does not generate compiler
warnings on 64-bit architectures.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 drivers/block/aoe/aoe.h    |    2 +-
 drivers/block/aoe/aoeblk.c |    3 +--
 drivers/block/aoe/aoecmd.c |   10 +++++-----
 drivers/block/aoe/aoenet.c |    4 ++--
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 069f04c..f085465 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -208,4 +208,4 @@ void aoenet_xmit(struct sk_buff *);
 int is_aoe_netif(struct net_device *ifp);
 int set_aoe_iflist(const char __user *str, size_t size);
 
-u64 mac_addr(char addr[6]);
+unsigned long long mac_addr(char addr[6]);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index f6773ab..c9cf576 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -32,8 +32,7 @@ static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page)
 
 	if (t == NULL)
 		return snprintf(page, PAGE_SIZE, "none\n");
-	return snprintf(page, PAGE_SIZE, "%012llx\n",
-			(unsigned long long)mac_addr(t->addr));
+	return snprintf(page, PAGE_SIZE, "%012llx\n", mac_addr(t->addr));
 }
 static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page)
 {
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 8a3a973..62ba58c 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -305,7 +305,8 @@ resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
 	snprintf(buf, sizeof buf,
 		"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%012llx d=%012llx nout=%d\n",
 		"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
-		mac_addr(h->src), mac_addr(h->dst), t->nout);
+		mac_addr(h->src),
+		mac_addr(h->dst), t->nout);
 	aoechr_error(buf);
 
 	f->tag = n;
@@ -616,7 +617,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 
 	if (d->ssize != ssize)
 		printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n",
-			(unsigned long long)mac_addr(t->addr),
+			mac_addr(t->addr),
 			d->aoemajor, d->aoeminor,
 			d->fw_ver, (long long)ssize);
 	d->ssize = ssize;
@@ -710,8 +711,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	t = gettgt(d, hin->src);
 	if (t == NULL) {
 		printk(KERN_INFO "aoe: can't find target e%ld.%d:%012llx\n",
-			d->aoemajor, d->aoeminor,
-			(unsigned long long) mac_addr(hin->src));
+			d->aoemajor, d->aoeminor, mac_addr(hin->src));
 		spin_unlock_irqrestore(&d->lock, flags);
 		return;
 	}
@@ -988,7 +988,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
 			printk(KERN_INFO
 				"aoe: e%ld.%d: setting %d byte data frames on %s:%012llx\n",
 				d->aoemajor, d->aoeminor, n, ifp->nd->name,
-				(unsigned long long) mac_addr(t->addr));
+				mac_addr(t->addr));
 			ifp->maxbcnt = n;
 		}
 	}
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index f335099..d2c1a47 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -82,7 +82,7 @@ set_aoe_iflist(const char __user *user_str, size_t size)
 	return 0;
 }
 
-u64
+unsigned long long
 mac_addr(char addr[6])
 {
 	__be64 n = 0;
@@ -90,7 +90,7 @@ mac_addr(char addr[6])
 
 	memcpy(p + 2, addr, 6);	/* (sizeof addr != 6) */
 
-	return __be64_to_cpu(n);
+	return (unsigned long long) __be64_to_cpu(n);
 }
 
 void
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 07/12] use a dynamic pool of sk_buffs to keep up with fast targets
  2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
                   ` (2 preceding siblings ...)
  2007-06-26 18:50 ` [PATCH 05/12] eliminate goto and improve readability Ed L. Cashin
@ 2007-06-26 18:50 ` Ed L. Cashin
  2007-07-03  4:36   ` Andrew Morton
  2007-06-26 18:50 ` [PATCH 06/12] user can ask driver to forget previously detected devices Ed L. Cashin
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

Use a dynamic pool of sk_buffs to keep up with fast targets.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 drivers/block/aoe/aoe.h    |    5 ++
 drivers/block/aoe/aoecmd.c |  129 +++++++++++++++++++++++++++++---------------
 drivers/block/aoe/aoedev.c |   51 +++++++++++++++---
 3 files changed, 134 insertions(+), 51 deletions(-)

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 7fa86dd..55c2f08 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -98,6 +98,7 @@ enum {
 	MIN_BUFS = 16,
 	NTARGETS = 8,
 	NAOEIFS = 8,
+	NSKBPOOLMAX = 128,
 
 	TIMERTICK = HZ / 10,
 	MINTIMER = HZ >> 2,
@@ -147,6 +148,7 @@ struct aoetgt {
 	u16 useme;
 	ulong lastwadj;		/* last window adjustment */
 int wpkts, rpkts;
+int dataref;
 };
 
 struct aoedev {
@@ -168,6 +170,9 @@ struct aoedev {
 	spinlock_t lock;
 	struct sk_buff *sendq_hd; /* packets needing to be sent, list head */
 	struct sk_buff *sendq_tl;
+	struct sk_buff *skbpool_hd;
+	struct sk_buff *skbpool_tl;
+	int nskbpool;
 	mempool_t *bufpool;	/* for deadlock-free Buf allocation */
 	struct list_head bufq;	/* queue of bios to work on */
 	struct buf *inprocess;	/* the one we're currently working on */
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 62ba58c..89df9de 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -105,43 +105,102 @@ ifrotate(struct aoetgt *t)
 	}
 }
 
+static void
+skb_pool_put(struct aoedev *d, struct sk_buff *skb)
+{
+	if (!d->skbpool_hd)
+		d->skbpool_hd = skb;
+	else
+		d->skbpool_tl->next = skb;
+	d->skbpool_tl = skb;
+}
+
+static struct sk_buff *
+skb_pool_get(struct aoedev *d)
+{
+	struct sk_buff *skb;
+
+	skb = d->skbpool_hd;
+	if (skb)
+	if (atomic_read(&skb_shinfo(skb)->dataref) == 1) {
+		d->skbpool_hd = skb->next;
+		skb->next = NULL;
+		return skb;
+	}
+	if (d->nskbpool < NSKBPOOLMAX)
+	if ((skb = new_skb(ETH_ZLEN))) {
+		d->nskbpool++;
+		return skb;
+	}
+	return NULL;
+}
+
+/* freeframe is where we do our load balancing so it's a little hairy. */
 static struct frame *
 freeframe(struct aoedev *d)
 {
-	struct frame *f, *e;
+	struct frame *f, *e, *rf;
 	struct aoetgt **t;
-	ulong n;
+	struct sk_buff *skb;
 
 	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
 		printk(KERN_ERR "aoe: NULL TARGETS!\n");
 		return NULL;
 	}
-	t = d->targets;
-	do {
+	t = d->tgt;
+	t++;
+	if (t >= &d->targets[NTARGETS] || !*t)
+		t = d->targets;
+	for (;;) {
+		if ((*t)->nout < (*t)->maxout)
 		if (t != d->htgt)
-		if ((*t)->ifp->nd)
-		if ((*t)->nout < (*t)->maxout) {
-			n = (*t)->nframes;
+		if ((*t)->ifp->nd) {
+			rf = NULL;
 			f = (*t)->frames;
-			e = f + n;
+			e = f + (*t)->nframes;
 			for (; f<e; f++) {
 				if (f->tag != FREETAG)
 					continue;
-				if (atomic_read(&skb_shinfo(f->skb)->dataref) != 1) {
-					n--;
+				skb = f->skb;
+				if (!skb)
+				if (!(f->skb = skb = new_skb(ETH_ZLEN)))
+					continue;
+				if (atomic_read(&skb_shinfo(skb)->dataref) != 1) {
+					if (!rf)
+						rf = f;
 					continue;
 				}
-				skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
-				skb_trim(f->skb, 0);
+gotone:				skb_shinfo(skb)->nr_frags = skb->data_len = 0;
+				skb_trim(skb, 0);
 				d->tgt = t;
 				ifrotate(*t);
 				return f;
 			}
-			if (n == 0)	/* slow polling network card */
+			/* Work can be done, but the network layer is
+			   holding our precious packets.  Try to grab
+			   one from the pool. */
+			f = rf;
+			if (f == NULL) {	/* more paranoia */
+				printk(KERN_ERR "aoe: freeframe: unexpected null rf.\n");
+				d->flags |= DEVFL_KICKME;
+				return NULL;
+			}
+			skb = skb_pool_get(d);
+			if (skb) {
+				skb_pool_put(d, f->skb);
+				f->skb = skb;
+				goto gotone;
+			}
+			(*t)->dataref++;
+			if ((*t)->nout == 0)
 				d->flags |= DEVFL_KICKME;
 		}
+		if (t == d->tgt)	/* we've looped and found nada */
+			break;
 		t++;
-	} while (t < &d->targets[NTARGETS] && *t);
+		if (t >= &d->targets[NTARGETS] || !*t)
+			t = d->targets;
+	}
 	return NULL;
 }
 
@@ -870,44 +929,26 @@ addtgt(struct aoedev *d, char *addr, ulong nframes)
 
 	tt = d->targets;
 	te = tt + NTARGETS;
-	for (; tt<te; tt++) {
-		if (*tt == NULL)
-			break;
-		else if (memcmp((*tt)->addr, addr, 6) > 0) {
-			memmove(tt+1, tt, (void *)te-(void *)(tt+1));
-			break;
-		}
-	}
+	for (; tt<te && *tt; tt++)
+		;
+
 	if (tt == te)
 		return NULL;
 
 	t = kcalloc(1, sizeof *t, GFP_ATOMIC);
+	if (!t)
+		return NULL;
 	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
-	switch (!t || !f) {
-	case 0:
-		t->nframes = nframes;
-		t->frames = f;
-		e = f + nframes;
-		for (; f<e; f++) {
-			f->tag = FREETAG;
-			f->skb = new_skb(ETH_ZLEN);
-			if (!f->skb)
-				break;
-		}
-		if (f == e)
-			break;
-		while (f > t->frames) {
-			f--;
-			dev_kfree_skb(f->skb);
-		}
-	default:
-		if (f)
-			kfree(f);
-		if (t)
-			kfree(t);
+	if (!f) {
+		kfree(t);
 		return NULL;
 	}
 
+	t->nframes = nframes;
+	t->frames = f;
+	e = f + nframes;
+	for (; f<e; f++)
+		f->tag = FREETAG;
 	memcpy(t->addr, addr, sizeof t->addr);
 	t->ifp = t->ifs;
 	t->maxout = t->nframes;
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 8659796..cf4e58d 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -7,11 +7,13 @@
 #include <linux/hdreg.h>
 #include <linux/blkdev.h>
 #include <linux/netdevice.h>
+#include <linux/delay.h>
 #include "aoe.h"
 
 static void dummy_timer(ulong);
 static void aoedev_freedev(struct aoedev *);
-static void freetgt(struct aoetgt *t);
+static void freetgt(struct aoedev *d, struct aoetgt *t);
+static void skbpoolfree(struct aoedev *d);
 
 static struct aoedev *devlist;
 static spinlock_t devlist_lock;
@@ -125,9 +127,10 @@ aoedev_freedev(struct aoedev *d)
 	t = d->targets;
 	e = t + NTARGETS;
 	for (; t<e && *t; t++)
-		freetgt(*t);
+		freetgt(d, *t);
 	if (d->bufpool)
 		mempool_destroy(d->bufpool);
+	skbpoolfree(d);
 	kfree(d);
 }
 
@@ -176,6 +179,42 @@ aoedev_flush(const char __user *str, size_t cnt)
 	return 0;
 }
 
+/* I'm not really sure that this is a realistic problem, but if the
+network driver goes gonzo let's just leak memory after complaining. */
+static void
+skbfree(struct sk_buff *skb)
+{
+	enum { Sms= 100, Tms= 3*1000};
+	int i = Tms / Sms;
+
+	if (skb == NULL)
+		return;
+	while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
+		msleep_interruptible(Sms);
+	if (i <= 0) {
+		printk(KERN_ERR
+			"aoe: %s holds ref: cannot free skb -- memory leaked.\n",
+			skb->dev ? skb->dev->name : "netif");
+		return;
+	}
+	skb_shinfo(skb)->nr_frags = skb->data_len = 0;
+	skb_trim(skb, 0);
+	dev_kfree_skb(skb);
+}
+
+static void
+skbpoolfree(struct aoedev *d)
+{
+	struct sk_buff *skb;
+
+	while ((skb = d->skbpool_hd)) {
+		d->skbpool_hd = skb->next;
+		skb->next = NULL;
+		skbfree(skb);
+	}
+	d->skbpool_tl = NULL;
+}
+
 /* find it or malloc it */
 struct aoedev *
 aoedev_by_sysminor_m(ulong sysminor)
@@ -214,16 +253,14 @@ aoedev_by_sysminor_m(ulong sysminor)
 }
 
 static void
-freetgt(struct aoetgt *t)
+freetgt(struct aoedev *d, struct aoetgt *t)
 {
 	struct frame *f, *e;
 
 	f = t->frames;
 	e = f + t->nframes;
-	for (; f<e; f++) {
-		skb_shinfo(f->skb)->nr_frags = 0;
-		dev_kfree_skb(f->skb);
-	}
+	for (; f<e; f++)
+		skbfree(f->skb);
 	kfree(t->frames);
 	kfree(t);
 }
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 06/12] user can ask driver to forget previously detected devices
  2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
                   ` (3 preceding siblings ...)
  2007-06-26 18:50 ` [PATCH 07/12] use a dynamic pool of sk_buffs to keep up with fast targets Ed L. Cashin
@ 2007-06-26 18:50 ` Ed L. Cashin
  2007-06-26 18:50 ` [PATCH 04/12] clean up udev configuration example Ed L. Cashin
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

When an AoE device is detected, the kernel is informed, and a new
block device is created.  If the device is unused, the block device
corresponding to remote device that is no longer available may be
removed from the system by telling the aoe driver to "flush" its list
of devices.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 Documentation/aoe/mkdevs.sh |    2 +
 Documentation/aoe/udev.txt  |    1 +
 drivers/block/aoe/aoe.h     |    1 +
 drivers/block/aoe/aoechr.c  |    5 ++
 drivers/block/aoe/aoedev.c  |   87 +++++++++++++++++++++++++++++++++---------
 5 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/Documentation/aoe/mkdevs.sh b/Documentation/aoe/mkdevs.sh
index 97374aa..44c0ab7 100644
--- a/Documentation/aoe/mkdevs.sh
+++ b/Documentation/aoe/mkdevs.sh
@@ -29,6 +29,8 @@ rm -f $dir/interfaces
 mknod -m 0200 $dir/interfaces c $MAJOR 4
 rm -f $dir/revalidate
 mknod -m 0200 $dir/revalidate c $MAJOR 5
+rm -f $dir/flush
+mknod -m 0200 $dir/flush c $MAJOR 6
 
 export n_partitions
 mkshelf=`echo $0 | sed 's!mkdevs!mkshelf!'`
diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt
index 17e76c4..8686e78 100644
--- a/Documentation/aoe/udev.txt
+++ b/Documentation/aoe/udev.txt
@@ -20,6 +20,7 @@ SUBSYSTEM=="aoe", KERNEL=="discover",	NAME="etherd/%k", GROUP="disk", MODE="0220
 SUBSYSTEM=="aoe", KERNEL=="err",	NAME="etherd/%k", GROUP="disk", MODE="0440"
 SUBSYSTEM=="aoe", KERNEL=="interfaces",	NAME="etherd/%k", GROUP="disk", MODE="0220"
 SUBSYSTEM=="aoe", KERNEL=="revalidate",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="flush",	NAME="etherd/%k", GROUP="disk", MODE="0220"
 
 # aoe block devices     
 KERNEL=="etherd*",       NAME="%k", GROUP="disk"
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index f085465..7fa86dd 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -201,6 +201,7 @@ struct aoedev *aoedev_by_aoeaddr(int maj, int min);
 struct aoedev *aoedev_by_sysminor_m(ulong sysminor);
 void aoedev_downdev(struct aoedev *d);
 int aoedev_isbusy(struct aoedev *d);
+int aoedev_flush(const char __user *str, size_t size);
 
 int aoenet_init(void);
 void aoenet_exit(void);
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 511f995..10b38a7 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -15,6 +15,7 @@ enum {
 	MINOR_DISCOVER,
 	MINOR_INTERFACES,
 	MINOR_REVALIDATE,
+	MINOR_FLUSH,
 	MSGSZ = 2048,
 	NMSG = 100,		/* message backlog to retain */
 };
@@ -43,6 +44,7 @@ static struct aoe_chardev chardevs[] = {
 	{ MINOR_DISCOVER, "discover" },
 	{ MINOR_INTERFACES, "interfaces" },
 	{ MINOR_REVALIDATE, "revalidate" },
+	{ MINOR_FLUSH, "flush" },
 };
 
 static int
@@ -155,6 +157,9 @@ aoechr_write(struct file *filp, const char __user *buf, size_t cnt, loff_t *offp
 		break;
 	case MINOR_REVALIDATE:
 		ret = revalidate(buf, cnt);
+		break;
+	case MINOR_FLUSH:
+		ret = aoedev_flush(buf, cnt);
 	}
 	if (ret == 0)
 		ret = cnt;
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 04c75b4..8659796 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -9,6 +9,10 @@
 #include <linux/netdevice.h>
 #include "aoe.h"
 
+static void dummy_timer(ulong);
+static void aoedev_freedev(struct aoedev *);
+static void freetgt(struct aoetgt *t);
+
 static struct aoedev *devlist;
 static spinlock_t devlist_lock;
 
@@ -108,6 +112,70 @@ aoedev_downdev(struct aoedev *d)
 	d->flags &= ~DEVFL_UP;
 }
 
+static void
+aoedev_freedev(struct aoedev *d)
+{
+	struct aoetgt **t, **e;
+
+	if (d->gd) {
+		aoedisk_rm_sysfs(d);
+		del_gendisk(d->gd);
+		put_disk(d->gd);
+	}
+	t = d->targets;
+	e = t + NTARGETS;
+	for (; t<e && *t; t++)
+		freetgt(*t);
+	if (d->bufpool)
+		mempool_destroy(d->bufpool);
+	kfree(d);
+}
+
+int
+aoedev_flush(const char __user *str, size_t cnt)
+{
+	ulong flags;
+	struct aoedev *d, **dd;
+	struct aoedev *rmd = NULL;
+	char buf[16];
+	int all = 0;
+
+	if (cnt >= 3) {
+		if (cnt > sizeof buf)
+			cnt = sizeof buf;
+		if (copy_from_user(buf, str, cnt))
+			return -EFAULT;
+		all = !strncmp(buf, "all", 3);
+	}
+
+	flush_scheduled_work();
+	spin_lock_irqsave(&devlist_lock, flags);
+	dd = &devlist;
+	while ((d=*dd)) {
+		spin_lock(&d->lock);
+		if ((!all && (d->flags & DEVFL_UP))
+		|| (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
+		|| d->nopen) {
+			spin_unlock(&d->lock);
+			dd = &d->next;
+			continue;
+		}
+		*dd = d->next;
+		aoedev_downdev(d);
+		d->flags |= DEVFL_TKILL;
+		spin_unlock(&d->lock);
+		d->next = rmd;
+		rmd = d;
+	}
+	spin_unlock_irqrestore(&devlist_lock, flags);
+	while ((d=rmd)) {
+		rmd = d->next;
+		del_timer_sync(&d->timer);
+		aoedev_freedev(d);	// must be able to sleep
+	}
+	return 0;
+}
+
 /* find it or malloc it */
 struct aoedev *
 aoedev_by_sysminor_m(ulong sysminor)
@@ -160,25 +228,6 @@ freetgt(struct aoetgt *t)
 	kfree(t);
 }
 
-static void
-aoedev_freedev(struct aoedev *d)
-{
-	struct aoetgt **t, **e;
-
-	if (d->gd) {
-		aoedisk_rm_sysfs(d);
-		del_gendisk(d->gd);
-		put_disk(d->gd);
-	}
-	t = d->targets;
-	e = t + NTARGETS;
-	for (; t<e && *t; t++)
-		freetgt(*t);
-	if (d->bufpool)
-		mempool_destroy(d->bufpool);
-	kfree(d);
-}
-
 void
 aoedev_exit(void)
 {
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 05/12] eliminate goto and improve readability
  2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
  2007-06-26 18:50 ` [PATCH 03/12] mac_addr: avoid 64-bit arch compiler warnings Ed L. Cashin
  2007-06-26 18:50 ` [PATCH 02/12] handle multiple network paths to AoE device Ed L. Cashin
@ 2007-06-26 18:50 ` Ed L. Cashin
  2007-06-26 18:50 ` [PATCH 07/12] use a dynamic pool of sk_buffs to keep up with fast targets Ed L. Cashin
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

Adam Richter suggested eliminating this goto.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 drivers/block/aoe/aoechr.c |   69 +++++++++++++++++++++----------------------
 1 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 9026c44..511f995 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -191,52 +191,51 @@ aoechr_read(struct file *filp, char __user *buf, size_t cnt, loff_t *off)
 	ulong flags;
 
 	n = (unsigned long) filp->private_data;
-	switch (n) {
-	case MINOR_ERR:
-		spin_lock_irqsave(&emsgs_lock, flags);
-loop:
-		em = emsgs + emsgs_head_idx;
-		if ((em->flags & EMFL_VALID) == 0) {
-			if (filp->f_flags & O_NDELAY) {
-				spin_unlock_irqrestore(&emsgs_lock, flags);
-				return -EAGAIN;
-			}
-			nblocked_emsgs_readers++;
+	if (n != MINOR_ERR)
+		return -EFAULT;
+
+	spin_lock_irqsave(&emsgs_lock, flags);
 
+	for (;;) {
+		em = emsgs + emsgs_head_idx;
+		if ((em->flags & EMFL_VALID) != 0)
+			break;
+		if (filp->f_flags & O_NDELAY) {
 			spin_unlock_irqrestore(&emsgs_lock, flags);
+			return -EAGAIN;
+		}
+		nblocked_emsgs_readers++;
+
+		spin_unlock_irqrestore(&emsgs_lock, flags);
 
-			n = down_interruptible(&emsgs_sema);
+		n = down_interruptible(&emsgs_sema);
 
-			spin_lock_irqsave(&emsgs_lock, flags);
+		spin_lock_irqsave(&emsgs_lock, flags);
 
-			nblocked_emsgs_readers--;
+		nblocked_emsgs_readers--;
 
-			if (n) {
-				spin_unlock_irqrestore(&emsgs_lock, flags);
-				return -ERESTARTSYS;
-			}
-			goto loop;
-		}
-		if (em->len > cnt) {
+		if (n) {
 			spin_unlock_irqrestore(&emsgs_lock, flags);
-			return -EAGAIN;
+			return -ERESTARTSYS;
 		}
-		mp = em->msg;
-		len = em->len;
-		em->msg = NULL;
-		em->flags &= ~EMFL_VALID;
+	}
+	if (em->len > cnt) {
+		spin_unlock_irqrestore(&emsgs_lock, flags);
+		return -EAGAIN;
+	}
+	mp = em->msg;
+	len = em->len;
+	em->msg = NULL;
+	em->flags &= ~EMFL_VALID;
 
-		emsgs_head_idx++;
-		emsgs_head_idx %= ARRAY_SIZE(emsgs);
+	emsgs_head_idx++;
+	emsgs_head_idx %= ARRAY_SIZE(emsgs);
 
-		spin_unlock_irqrestore(&emsgs_lock, flags);
+	spin_unlock_irqrestore(&emsgs_lock, flags);
 
-		n = copy_to_user(buf, mp, len);
-		kfree(mp);
-		return n == 0 ? len : -EFAULT;
-	default:
-		return -EFAULT;
-	}
+	n = copy_to_user(buf, mp, len);
+	kfree(mp);
+	return n == 0 ? len : -EFAULT;
 }
 
 static const struct file_operations aoe_fops = {
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 04/12] clean up udev configuration example
  2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
                   ` (4 preceding siblings ...)
  2007-06-26 18:50 ` [PATCH 06/12] user can ask driver to forget previously detected devices Ed L. Cashin
@ 2007-06-26 18:50 ` Ed L. Cashin
  2007-06-26 18:50 ` [PATCH 12/12] the aoeminor doesn't need a long format Ed L. Cashin
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

This patch adds a known default location for the udev configuration
file and uses the more recent "==" syntax for SUBSYSTEM and KERNEL.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 Documentation/aoe/udev-install.sh |    5 ++++-
 Documentation/aoe/udev.txt        |   15 ++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/Documentation/aoe/udev-install.sh b/Documentation/aoe/udev-install.sh
index 6449911..15e86f5 100644
--- a/Documentation/aoe/udev-install.sh
+++ b/Documentation/aoe/udev-install.sh
@@ -23,7 +23,10 @@ fi
 # /etc/udev/rules.d
 #
 rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`"
-if test -z "$rules_d" || test ! -d "$rules_d"; then
+if test -z "$rules_d" ; then
+	rules_d=/etc/udev/rules.d
+fi
+if test ! -d "$rules_d"; then
 	echo "$me Error: cannot find udev rules directory" 1>&2
 	exit 1
 fi
diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt
index a7ed1dc..17e76c4 100644
--- a/Documentation/aoe/udev.txt
+++ b/Documentation/aoe/udev.txt
@@ -1,6 +1,7 @@
 # These rules tell udev what device nodes to create for aoe support.
-# They may be installed along the following lines (adjusted to what
-# you see on your system).
+# They may be installed along the following lines.  Check the section
+# 8 udev manpage to see whether your udev supports SUBSYSTEM, and
+# whether it uses one or two equal signs for SUBSYSTEM and KERNEL.
 # 
 #   ecashin@makki ~$ su
 #   Password:
@@ -15,10 +16,10 @@
 #  
 
 # aoe char devices
-SUBSYSTEM="aoe", KERNEL="discover",	NAME="etherd/%k", GROUP="disk", MODE="0220"
-SUBSYSTEM="aoe", KERNEL="err",		NAME="etherd/%k", GROUP="disk", MODE="0440"
-SUBSYSTEM="aoe", KERNEL="interfaces",	NAME="etherd/%k", GROUP="disk", MODE="0220"
-SUBSYSTEM="aoe", KERNEL="revalidate",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="discover",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="err",	NAME="etherd/%k", GROUP="disk", MODE="0440"
+SUBSYSTEM=="aoe", KERNEL=="interfaces",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="revalidate",	NAME="etherd/%k", GROUP="disk", MODE="0220"
 
 # aoe block devices     
-KERNEL="etherd*",       NAME="%k", GROUP="disk"
+KERNEL=="etherd*",       NAME="%k", GROUP="disk"
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 12/12] the aoeminor doesn't need a long format
  2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
                   ` (5 preceding siblings ...)
  2007-06-26 18:50 ` [PATCH 04/12] clean up udev configuration example Ed L. Cashin
@ 2007-06-26 18:50 ` Ed L. Cashin
  2007-06-26 19:51   ` Randy Dunlap
  2007-06-26 18:50 ` [PATCH 10/12] add module parameter for users who need more outstanding I/O Ed L. Cashin
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

The aoedev aoeminor member doesn't need a long format.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 drivers/block/aoe/aoeblk.c |    6 +++---
 drivers/block/aoe/aoecmd.c |    4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index ccadd9a..e216fe0 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -203,7 +203,7 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)
 	spin_lock_irqsave(&d->lock, flags);
 
 	if ((d->flags & DEVFL_UP) == 0) {
-		printk(KERN_INFO "aoe: device %ld.%ld is not up\n",
+		printk(KERN_INFO "aoe: device %ld.%d is not up\n",
 			d->aoemajor, d->aoeminor);
 		spin_unlock_irqrestore(&d->lock, flags);
 		mempool_free(buf, d->bufpool);
@@ -256,7 +256,7 @@ aoeblk_gdalloc(void *vp)
 
 	gd = alloc_disk(AOE_PARTITIONS);
 	if (gd == NULL) {
-		printk(KERN_ERR "aoe: cannot allocate disk structure for %ld.%ld\n",
+		printk(KERN_ERR "aoe: cannot allocate disk structure for %ld.%d\n",
 			d->aoemajor, d->aoeminor);
 		spin_lock_irqsave(&d->lock, flags);
 		d->flags &= ~DEVFL_GDALLOC;
@@ -266,7 +266,7 @@ aoeblk_gdalloc(void *vp)
 
 	d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache);
 	if (d->bufpool == NULL) {
-		printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%ld\n",
+		printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
 			d->aoemajor, d->aoeminor);
 		put_disk(gd);
 		spin_lock_irqsave(&d->lock, flags);
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 9de0024..dfb1184 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -680,7 +680,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 	}
 
 	if (d->ssize != ssize)
-		printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n",
+		printk(KERN_INFO "aoe: %012llx e%ld.%d v%04x has %llu sectors\n",
 			mac_addr(t->addr),
 			d->aoemajor, d->aoeminor,
 			d->fw_ver, (long long)ssize);
@@ -805,7 +805,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 
 	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
 		printk(KERN_ERR
-			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n",
+			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
 			ahout->cmdstat, ahin->cmdstat,
 			d->aoemajor, d->aoeminor);
 		if (buf)
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 11/12] remove extra space in prototypes for consistency
  2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
                   ` (7 preceding siblings ...)
  2007-06-26 18:50 ` [PATCH 10/12] add module parameter for users who need more outstanding I/O Ed L. Cashin
@ 2007-06-26 18:50 ` Ed L. Cashin
  2007-06-26 18:50 ` [PATCH 09/12] remove race between use and initialization of locks Ed L. Cashin
  2007-06-26 18:50 ` [PATCH 08/12] only schedule work once Ed L. Cashin
  10 siblings, 0 replies; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

Remove extra space in prototypes for consistency.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 drivers/block/aoe/aoeblk.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index c9cf576..ccadd9a 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -14,7 +14,7 @@
 
 static struct kmem_cache *buf_pool_cache;
 
-static ssize_t aoedisk_show_state(struct gendisk * disk, char *page)
+static ssize_t aoedisk_show_state(struct gendisk *disk, char *page)
 {
 	struct aoedev *d = disk->private_data;
 
@@ -25,7 +25,7 @@ static ssize_t aoedisk_show_state(struct gendisk * disk, char *page)
 			(d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
 	/* I'd rather see nopen exported so we can ditch closewait */
 }
-static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page)
+static ssize_t aoedisk_show_mac(struct gendisk *disk, char *page)
 {
 	struct aoedev *d = disk->private_data;
 	struct aoetgt *t = d->targets[0];
@@ -34,7 +34,7 @@ static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page)
 		return snprintf(page, PAGE_SIZE, "none\n");
 	return snprintf(page, PAGE_SIZE, "%012llx\n", mac_addr(t->addr));
 }
-static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page)
+static ssize_t aoedisk_show_netif(struct gendisk *disk, char *page)
 {
 	struct aoedev *d = disk->private_data;
 	struct net_device *nds[8], **nd, **nnd, **ne;
@@ -71,7 +71,7 @@ static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page)
 	return p-page;
 }
 /* firmware version */
-static ssize_t aoedisk_show_fwver(struct gendisk * disk, char *page)
+static ssize_t aoedisk_show_fwver(struct gendisk *disk, char *page)
 {
 	struct aoedev *d = disk->private_data;
 
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 09/12] remove race between use and initialization of locks
  2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
                   ` (8 preceding siblings ...)
  2007-06-26 18:50 ` [PATCH 11/12] remove extra space in prototypes for consistency Ed L. Cashin
@ 2007-06-26 18:50 ` Ed L. Cashin
  2007-07-03  4:38   ` Andrew Morton
  2007-06-26 18:50 ` [PATCH 08/12] only schedule work once Ed L. Cashin
  10 siblings, 1 reply; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

This change was originally submitted by Alexey Dobriyan in an email
with ...

  Message-ID: <20070325190221.GA5308@martell.zuzino.mipt.ru>

and the comment,

  Some drivers do register_chrdev() before lock or semaphore used in
  corresponding file_operations is initialized.

Andrew Morton commented that these locks should be initialized at
compile time, but Alexey Debriyan pointed out that the Documentation
tells us to use dynamic initialization whenever possible, and then the
discussion petered out.

  http://preview.tinyurl.com/2pxq6p

I believe we made these locks dynamic because of the notice in
Documentation/spinlocks.txt, which says that static initializers are
deprecated:

  UPDATE March 21 2005 Amit Gud <gud@eth.net>

  Macros SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated and will be
  removed soon. So for any new code dynamic initialization should be used:
...

In any case, the patch below makes the code correct and in keeping
with the existing documentation.  If the existing docs are wrong, I'd
be happy to follow up with a patch that corrects them and makes these
aoechr.c locks static.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 drivers/block/aoe/aoechr.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 10b38a7..2b4f873 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -256,13 +256,13 @@ aoechr_init(void)
 {
 	int n, i;
 
+	sema_init(&emsgs_sema, 0);
+	spin_lock_init(&emsgs_lock);
 	n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops);
 	if (n < 0) { 
 		printk(KERN_ERR "aoe: can't register char device\n");
 		return n;
 	}
-	sema_init(&emsgs_sema, 0);
-	spin_lock_init(&emsgs_lock);
 	aoe_class = class_create(THIS_MODULE, "aoe");
 	if (IS_ERR(aoe_class)) {
 		unregister_chrdev(AOE_MAJOR, "aoechr");
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 08/12] only schedule work once
  2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
                   ` (9 preceding siblings ...)
  2007-06-26 18:50 ` [PATCH 09/12] remove race between use and initialization of locks Ed L. Cashin
@ 2007-06-26 18:50 ` Ed L. Cashin
  2007-07-03  4:37   ` Andrew Morton
  10 siblings, 1 reply; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

Only schedule work once.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 drivers/block/aoe/aoecmd.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 89df9de..8d6540b 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -681,6 +681,8 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 			d->fw_ver, (long long)ssize);
 	d->ssize = ssize;
 	d->geo.start = 0;
+	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
+		return;
 	if (d->gd != NULL) {
 		d->gd->capacity = ssize;
 		d->flags |= DEVFL_NEWSIZE;
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 10/12] add module parameter for users who need more outstanding I/O
  2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
                   ` (6 preceding siblings ...)
  2007-06-26 18:50 ` [PATCH 12/12] the aoeminor doesn't need a long format Ed L. Cashin
@ 2007-06-26 18:50 ` Ed L. Cashin
  2007-07-03  4:41   ` Andrew Morton
  2007-06-26 18:50 ` [PATCH 11/12] remove extra space in prototypes for consistency Ed L. Cashin
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: Greg K-H, ecashin

Add module parameter for users who need more outstanding I/O.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 drivers/block/aoe/aoecmd.c |   10 +++++++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 8d6540b..9de0024 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -17,6 +17,11 @@ static int aoe_deadsecs = 60 * 3;
 module_param(aoe_deadsecs, int, 0644);
 MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
 
+static int aoe_maxout = 16;
+module_param(aoe_maxout, int, 0644);
+MODULE_PARM_DESC(aoe_maxout,
+	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
+
 static struct sk_buff *
 new_skb(ulong len)
 {
@@ -967,7 +972,6 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
 	struct aoeif *ifp;
 	ulong flags, sysminor, aoemajor;
 	struct sk_buff *sl;
-	enum { MAXFRAMES = 16 };
 	u16 n;
 
 	h = aoe_hdr(skb);
@@ -992,8 +996,8 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
 	}
 
 	n = be16_to_cpu(ch->bufcnt);
-	if (n > MAXFRAMES)	/* keep it reasonable */
-		n = MAXFRAMES;
+	if (n > aoe_maxout)	/* keep it reasonable */
+		n = aoe_maxout;
 
 	d = aoedev_by_sysminor_m(sysminor);
 	if (d == NULL) {
-- 
1.5.2.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 12/12] the aoeminor doesn't need a long format
  2007-06-26 18:50 ` [PATCH 12/12] the aoeminor doesn't need a long format Ed L. Cashin
@ 2007-06-26 19:51   ` Randy Dunlap
  2007-06-26 19:59     ` Ed L. Cashin
  0 siblings, 1 reply; 28+ messages in thread
From: Randy Dunlap @ 2007-06-26 19:51 UTC (permalink / raw)
  To: Ed L. Cashin; +Cc: linux-kernel, Greg K-H

On Tue, 26 Jun 2007 14:50:12 -0400 Ed L. Cashin wrote:

> The aoedev aoeminor member doesn't need a long format.

Was there a patch that changed aoeminor to an int?
Last I see is:
	ulong aoeminor;
in linux-2.6.22-rc6/drivers/block/aoe/aoe.h.

If it's still ulong, you shouldn't change the printk format.

> Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
> ---
>  drivers/block/aoe/aoeblk.c |    6 +++---
>  drivers/block/aoe/aoecmd.c |    4 ++--
>  2 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
> index ccadd9a..e216fe0 100644
> --- a/drivers/block/aoe/aoeblk.c
> +++ b/drivers/block/aoe/aoeblk.c
> @@ -203,7 +203,7 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)
>  	spin_lock_irqsave(&d->lock, flags);
>  
>  	if ((d->flags & DEVFL_UP) == 0) {
> -		printk(KERN_INFO "aoe: device %ld.%ld is not up\n",
> +		printk(KERN_INFO "aoe: device %ld.%d is not up\n",
>  			d->aoemajor, d->aoeminor);
>  		spin_unlock_irqrestore(&d->lock, flags);
>  		mempool_free(buf, d->bufpool);
> @@ -256,7 +256,7 @@ aoeblk_gdalloc(void *vp)
>  
>  	gd = alloc_disk(AOE_PARTITIONS);
>  	if (gd == NULL) {
> -		printk(KERN_ERR "aoe: cannot allocate disk structure for %ld.%ld\n",
> +		printk(KERN_ERR "aoe: cannot allocate disk structure for %ld.%d\n",
>  			d->aoemajor, d->aoeminor);
>  		spin_lock_irqsave(&d->lock, flags);
>  		d->flags &= ~DEVFL_GDALLOC;
> @@ -266,7 +266,7 @@ aoeblk_gdalloc(void *vp)
>  
>  	d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache);
>  	if (d->bufpool == NULL) {
> -		printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%ld\n",
> +		printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
>  			d->aoemajor, d->aoeminor);
>  		put_disk(gd);
>  		spin_lock_irqsave(&d->lock, flags);
> diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
> index 9de0024..dfb1184 100644
> --- a/drivers/block/aoe/aoecmd.c
> +++ b/drivers/block/aoe/aoecmd.c
> @@ -680,7 +680,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
>  	}
>  
>  	if (d->ssize != ssize)
> -		printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n",
> +		printk(KERN_INFO "aoe: %012llx e%ld.%d v%04x has %llu sectors\n",
>  			mac_addr(t->addr),
>  			d->aoemajor, d->aoeminor,
>  			d->fw_ver, (long long)ssize);
> @@ -805,7 +805,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
>  
>  	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
>  		printk(KERN_ERR
> -			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n",
> +			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
>  			ahout->cmdstat, ahin->cmdstat,
>  			d->aoemajor, d->aoeminor);
>  		if (buf)
> -- 

---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 12/12] the aoeminor doesn't need a long format
  2007-06-26 19:51   ` Randy Dunlap
@ 2007-06-26 19:59     ` Ed L. Cashin
  0 siblings, 0 replies; 28+ messages in thread
From: Ed L. Cashin @ 2007-06-26 19:59 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: linux-kernel, Greg K-H

On Tue, Jun 26, 2007 at 12:51:07PM -0700, Randy Dunlap wrote:
> On Tue, 26 Jun 2007 14:50:12 -0400 Ed L. Cashin wrote:
> 
> > The aoedev aoeminor member doesn't need a long format.
> 
> Was there a patch that changed aoeminor to an int?
> Last I see is:
> 	ulong aoeminor;
> in linux-2.6.22-rc6/drivers/block/aoe/aoe.h.
> 
> If it's still ulong, you shouldn't change the printk format.

Yes, thanks for checking.  Patch 2 of 12 did change it to a u16.

-- 
  Ed L Cashin <ecashin@coraid.com>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 02/12] handle multiple network paths to AoE device
  2007-06-26 18:50 ` [PATCH 02/12] handle multiple network paths to AoE device Ed L. Cashin
@ 2007-07-03  4:29   ` Andrew Morton
  2007-07-11 14:46     ` Ed L. Cashin
  2007-07-16 22:17     ` [PATCH] stacked ifs (was Re: [PATCH 02/12] handle multiple network paths to AoE device) Ed L. Cashin
  0 siblings, 2 replies; 28+ messages in thread
From: Andrew Morton @ 2007-07-03  4:29 UTC (permalink / raw)
  To: Ed L. Cashin; +Cc: linux-kernel, Greg K-H

On Tue, 26 Jun 2007 14:50:10 -0400 "Ed L. Cashin" <ecashin@coraid.com> wrote:

> Handle multiple network paths to AoE device.
>
> ...
>
> 
>  	struct buf *inprocess;	/* the one we're currently working on */
> -	ushort lostjumbo;
> -	ushort nframes;		/* number of frames below */
> -	struct frame *frames;
> +	struct aoetgt *targets[NTARGETS];
> +	struct aoetgt **tgt;	/* target in use when working */
> +	struct aoetgt **htgt;	/* target needing rexmit assistance */
> +//int ios[64];
>  };

whats that?
  
>  static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page)
>  {
>  	struct aoedev *d = disk->private_data;
> +	struct net_device *nds[8], **nd, **nnd, **ne;
> +	struct aoetgt **t, **te;
> +	struct aoeif *ifp, *e;
> +	char *p;
> +
> +	memset(nds, 0, ARRAY_SIZE(nds));

bug: this will only zero eight bytes.

> +	nd = nds;
> +	ne = nd + ARRAY_SIZE(nds);
> +	t = d->targets;
> +	te = t + NTARGETS;
> +	for (; t<te && *t; t++) {
> +		ifp = (*t)->ifs;
> +		e = ifp + NAOEIFS;
> +		for (; ifp<e && ifp->nd; ifp++) {
> +			for (nnd=nds; nnd<nd; nnd++)
> +				if (*nnd == ifp->nd)
> +					break;
> +			if (nnd == nd)
> +			if (nd != ne)
> +				*nd++ = ifp->nd;
> +		}
> +	}

There are multiple little coding-style warts here.  scripts/checkpatch.pl
will point them out.

>  
> -	return snprintf(page, PAGE_SIZE, "%s\n", d->ifp->name);
> +	ne = nd;
> +	nd = nds;
> +	if (*nd == NULL)
> +		return snprintf(page, PAGE_SIZE, "none\n");
> +	for (p=page; nd<ne; nd++)
> +		p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
> +			p == page ? "" : ",", (*nd)->name);
> +	p += snprintf(p, PAGE_SIZE - (p-page), "\n");
> +	return p-page;
>  }
>  /* firmware version */
>
> ..
>
>  	spin_lock_irqsave(&d->lock, flags);
> -	d->flags &= ~DEVFL_MAXBCNT;
> -	d->flags |= DEVFL_PAUSE;
> +	aoecmd_cleanslate(d);
> +loop:
> +	skb = aoecmd_ata_id(d);
>  	spin_unlock_irqrestore(&d->lock, flags);
> +	if (!skb && !msleep_interruptible(200)) {
> +		spin_lock_irqsave(&d->lock, flags);
> +		goto loop;
> +	}
> +	aoenet_xmit(skb);
>  	aoecmd_cfg(major, minor);
> -
>  	return 0;
>  }

interruptible sleep?  Does this code work as intended when there's a signal
pending?  (Maybe that's what the interruptible sleep is for: don't know,
and am not inclined to work it out given the (low) level of comments in
here and the (lower) level of changelogging).

> ...
>
> +static struct frame *
> +freeframe(struct aoedev *d)
>  {
> +	struct frame *f, *e;
> +	struct aoetgt **t;
> +	ulong n;
> +
> +	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
> +		printk(KERN_ERR "aoe: NULL TARGETS!\n");
> +		return NULL;
> +	}
> +	t = d->targets;
> +	do {
> +		if (t != d->htgt)
> +		if ((*t)->ifp->nd)
> +		if ((*t)->nout < (*t)->maxout) {

ugh.  Do this:

	do {
		if (t == d->htgt)
			continue;
		if (!(*t)->ifp->nd)
			continue;
		if ((*t)->nout >= (*t)->maxout)
			continue;
			
		<stuff>
	} while (++t ...)


> +#define ATASCNT(raw) (((struct aoe_atahdr *)(((struct aoe_hdr *)raw)+1))->scnt)

This could be implemented as a (possibly inlined) C function, therefore it
shuld be implemented that way.

> +
>  static void
>  rexmit_timer(ulong vp)
>  {
>  	struct aoedev *d;
> +	struct aoetgt *t, **tt, **te;
> +	struct aoeif *ifp;
>  	struct frame *f, *e;
>  	struct sk_buff *sl;
>  	register long timeout;
> @@ -373,31 +451,75 @@ rexmit_timer(ulong vp)
>  		spin_unlock_irqrestore(&d->lock, flags);
>  		return;
>  	}
> -	f = d->frames;
> -	e = f + d->nframes;
> -	for (; f<e; f++) {
> -		if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
> +	tt = d->targets;
> +	te = tt + NTARGETS;
> +	for (; tt<te && *tt; tt++) {
> +		t = *tt;
> +		f = t->frames;
> +		e = f + t->nframes;
> +		for (; f<e; f++) {
> +			if (f->tag == FREETAG
> +			|| tsince(f->tag) < timeout)
> +				continue;
>  			n = f->waited += timeout;
>  			n /= HZ;
> -			if (n > aoe_deadsecs) { /* waited too long for response */
> +			if (n > aoe_deadsecs) { /* waited too long.  device failure. */
>  				aoedev_downdev(d);
>  				break;
>  			}
> -			rexmit(d, f);
> +
> +			if (n > HELPWAIT) /* see if another target can help */
> +			if (tt != d->targets || d->targets[1])

poease find a way to avoid pulling this stunt.

> +				d->htgt = tt;
> +
> +			if (t->nout == t->maxout) {
> +				if (t->maxout > 1)
> +					t->maxout--;
> +				t->lastwadj = jiffies;
> +			}
> +
> +			ifp = getif(t, f->skb->dev);
> +			if (ifp && ++ifp->lost > (t->nframes << 1))
> +			if (ifp != t->ifs || t->ifs[1].nd) {

here too.

> +				ejectif(t, ifp);
> +				ifp = NULL;
> +			}
> +
> +			if (ATASCNT(aoe_hdr(f->skb)) > DEFAULTBCNT / 512)
> +			if (ifp && ++ifp->lostjumbo > (t->nframes << 1))
> +			if (ifp->maxbcnt != DEFAULTBCNT) {

more.

> +				printk(KERN_INFO "aoe: e%ld.%d: too many lost jumbo on %s:%012llx - "
> +					"falling back to %d frames.\n",
> +					d->aoemajor, d->aoeminor,
> +					ifp->nd->name, mac_addr(t->addr),
> +					DEFAULTBCNT);
> +				ifp->maxbcnt = 0;
> +			}
> +			resend(d, t, f);
> +		}
> +
> +		/* window check */
> +		if (t->nout == t->maxout)
> +		if (t->maxout < t->nframes)
> +		if ((jiffies - t->lastwadj)/HZ > 10) {

more.  Just use &&?


> +			t->maxout++;
> +			t->lastwadj = jiffies;
>  		}
>  	}
> -	if (d->flags & DEVFL_KICKME) {
> +
> +	if (d->sendq_hd) {
> +		n = d->rttavg <<= 1;
> +		if (n > MAXTIMER)
> +			d->rttavg = MAXTIMER;
> +	}
> +
> +	if (d->flags & DEVFL_KICKME || d->htgt) {
>  		d->flags &= ~DEVFL_KICKME;
>  		aoecmd_work(d);
>  	}
>  
>  	sl = d->sendq_hd;
>  	d->sendq_hd = d->sendq_tl = NULL;
> -	if (sl) {
> -		n = d->rttavg <<= 1;
> -		if (n > MAXTIMER)
> -			d->rttavg = MAXTIMER;
> -	}
>  
>  	d->timer.expires = jiffies + TIMERTICK;
>  	add_timer(&d->timer);

> +static struct aoetgt *
> +addtgt(struct aoedev *d, char *addr, ulong nframes)
> +{
> +	struct aoetgt *t, **tt, **te;
> +	struct frame *f, *e;
> +
> +	tt = d->targets;
> +	te = tt + NTARGETS;
> +	for (; tt<te; tt++) {
> +		if (*tt == NULL)
> +			break;
> +		else if (memcmp((*tt)->addr, addr, 6) > 0) {
> +			memmove(tt+1, tt, (void *)te-(void *)(tt+1));
> +			break;
> +		}
> +	}

Wow.

Perhaps there are so few comments because nobody understands what it's all
doing

> +	if (tt == te)
> +		return NULL;
> +
> +	t = kcalloc(1, sizeof *t, GFP_ATOMIC);
> +	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);

Is the GFP_ATOMIC unavoidable?  It is vastly less reliable than GFP_KERNEL.

> +	switch (!t || !f) {

Oh dear.  Please, use an `if' statement rather than party tricks like this.

> +	case 0:
> +		t->nframes = nframes;
> +		t->frames = f;
> +		e = f + nframes;
> +		for (; f<e; f++) {
> +			f->tag = FREETAG;
> +			f->skb = new_skb(ETH_ZLEN);
> +			if (!f->skb)
> +				break;
> +		}
> +		if (f == e)
> +			break;
> +		while (f > t->frames) {
> +			f--;
> +			dev_kfree_skb(f->skb);
> +		}
> +	default:
> +		if (f)
> +			kfree(f);
> +		if (t)
> +			kfree(t);

kfree(NULL) is legal.

> +		return NULL;
> +	}
> +
> +	memcpy(t->addr, addr, sizeof t->addr);
> +	t->ifp = t->ifs;
> +	t->maxout = t->nframes;
> +	return *tt = t;
> +}
> +
>  void
>  aoecmd_cfg_rsp(struct sk_buff *skb)
>  {
>  	struct aoedev *d;
>  	struct aoe_hdr *h;
>  	struct aoe_cfghdr *ch;
> +	struct aoetgt *t;
> +	struct aoeif *ifp;
>  	ulong flags, sysminor, aoemajor;
>  	struct sk_buff *sl;
>  	enum { MAXFRAMES = 16 };
> @@ -754,7 +952,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
>  	if (n > MAXFRAMES)	/* keep it reasonable */
>  		n = MAXFRAMES;
>  
> -	d = aoedev_by_sysminor_m(sysminor, n);
> +	d = aoedev_by_sysminor_m(sysminor);
>  	if (d == NULL) {
>  		printk(KERN_INFO "aoe: device sysminor_m failure\n");
>  		return;
> @@ -762,38 +960,70 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
>  
>  	spin_lock_irqsave(&d->lock, flags);
>  
> -	/* permit device to migrate mac and network interface */
> -	d->ifp = skb->dev;
> -	memcpy(d->addr, h->src, sizeof d->addr);
> -	if (!(d->flags & DEVFL_MAXBCNT)) {
> -		n = d->ifp->mtu;
> +	t = gettgt(d, h->src);
> +	if (!t) {
> +		t = addtgt(d, h->src, n);
> +		if (!t) {
> +			printk(KERN_INFO "aoe: device addtgt failure; too many targets?\n");

No, more likely a GFP_ATOMIC allocation failed.  Returning -ENOMEM is
better than just guessing.


> +			spin_unlock_irqrestore(&d->lock, flags);
> +			return;
> +		}
> +	}
> +	ifp = getif(t, skb->dev);
> +	if (!ifp) {
> +		if (!(ifp = addif(t, skb->dev))) {

Preferred style is

		ifp = addif(t, skb->dev);
		if (!ifp) {

(checkpatch will report this)

> +			printk(KERN_INFO "aoe: device addif failure; too many interfaces?\n");
> +			spin_unlock_irqrestore(&d->lock, flags);
> +			return;
> +		}
> +	}
> +	if (ifp->maxbcnt) {
> +		n = ifp->nd->mtu;
>  		n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
>  		n /= 512;
>  		if (n > ch->scnt)
>  			n = ch->scnt;
>  		n = n ? n * 512 : DEFAULTBCNT;
> -		if (n != d->maxbcnt) {
> +		if (n != ifp->maxbcnt) {
>  			printk(KERN_INFO
> -				"aoe: e%ld.%ld: setting %d byte data frames on %s\n",
> -				d->aoemajor, d->aoeminor, n, d->ifp->name);
> -			d->maxbcnt = n;
> +				"aoe: e%ld.%d: setting %d byte data frames on %s:%012llx\n",
> +				d->aoemajor, d->aoeminor, n, ifp->nd->name,
> +				(unsigned long long) mac_addr(t->addr));
> +			ifp->maxbcnt = n;
>  		}
>  	}
>  
>  	/* don't change users' perspective */
> -	if (d->nopen && !(d->flags & DEVFL_PAUSE)) {
> +	if (d->nopen) {
>  		spin_unlock_irqrestore(&d->lock, flags);
>  		return;
>  	}
> -	d->flags |= DEVFL_PAUSE;	/* force pause */
> -	d->mintimer = MINTIMER;
>  	d->fw_ver = be16_to_cpu(ch->fwver);
>  
> -	/* check for already outstanding ataid */
> -	sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL;
> +	sl = aoecmd_ata_id(d);
>  
>  	spin_unlock_irqrestore(&d->lock, flags);
>  
>  	aoenet_xmit(sl);
>  }
>  


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 07/12] use a dynamic pool of sk_buffs to keep up with fast targets
  2007-06-26 18:50 ` [PATCH 07/12] use a dynamic pool of sk_buffs to keep up with fast targets Ed L. Cashin
@ 2007-07-03  4:36   ` Andrew Morton
  2007-07-03  4:40     ` David Miller
  0 siblings, 1 reply; 28+ messages in thread
From: Andrew Morton @ 2007-07-03  4:36 UTC (permalink / raw)
  To: Ed L. Cashin; +Cc: linux-kernel, Greg K-H, netdev

On Tue, 26 Jun 2007 14:50:11 -0400 "Ed L. Cashin" <ecashin@coraid.com> wrote:

> Use a dynamic pool of sk_buffs to keep up with fast targets.

That's far too skimpy a description of what this patch is doing, what it is
for, what makes AOE need this functionality, etc.

My initial thought is that if there is a legitimate need for this new capability
then it should be made available to other parts of the kernel rather than being
private to the AEO driver.

But 12 words is not enough information for us to make that judgement.

I have one lower-level comment way down below:

> Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
> ---
>  drivers/block/aoe/aoe.h    |    5 ++
>  drivers/block/aoe/aoecmd.c |  129 +++++++++++++++++++++++++++++---------------
>  drivers/block/aoe/aoedev.c |   51 +++++++++++++++---
>  3 files changed, 134 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
> index 7fa86dd..55c2f08 100644
> --- a/drivers/block/aoe/aoe.h
> +++ b/drivers/block/aoe/aoe.h
> @@ -98,6 +98,7 @@ enum {
>  	MIN_BUFS = 16,
>  	NTARGETS = 8,
>  	NAOEIFS = 8,
> +	NSKBPOOLMAX = 128,
>  
>  	TIMERTICK = HZ / 10,
>  	MINTIMER = HZ >> 2,
> @@ -147,6 +148,7 @@ struct aoetgt {
>  	u16 useme;
>  	ulong lastwadj;		/* last window adjustment */
>  int wpkts, rpkts;
> +int dataref;
>  };
>  
>  struct aoedev {
> @@ -168,6 +170,9 @@ struct aoedev {
>  	spinlock_t lock;
>  	struct sk_buff *sendq_hd; /* packets needing to be sent, list head */
>  	struct sk_buff *sendq_tl;
> +	struct sk_buff *skbpool_hd;
> +	struct sk_buff *skbpool_tl;
> +	int nskbpool;
>  	mempool_t *bufpool;	/* for deadlock-free Buf allocation */
>  	struct list_head bufq;	/* queue of bios to work on */
>  	struct buf *inprocess;	/* the one we're currently working on */
> diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
> index 62ba58c..89df9de 100644
> --- a/drivers/block/aoe/aoecmd.c
> +++ b/drivers/block/aoe/aoecmd.c
> @@ -105,43 +105,102 @@ ifrotate(struct aoetgt *t)
>  	}
>  }
>  
> +static void
> +skb_pool_put(struct aoedev *d, struct sk_buff *skb)
> +{
> +	if (!d->skbpool_hd)
> +		d->skbpool_hd = skb;
> +	else
> +		d->skbpool_tl->next = skb;
> +	d->skbpool_tl = skb;
> +}
> +
> +static struct sk_buff *
> +skb_pool_get(struct aoedev *d)
> +{
> +	struct sk_buff *skb;
> +
> +	skb = d->skbpool_hd;
> +	if (skb)
> +	if (atomic_read(&skb_shinfo(skb)->dataref) == 1) {
> +		d->skbpool_hd = skb->next;
> +		skb->next = NULL;
> +		return skb;
> +	}
> +	if (d->nskbpool < NSKBPOOLMAX)
> +	if ((skb = new_skb(ETH_ZLEN))) {
> +		d->nskbpool++;
> +		return skb;
> +	}
> +	return NULL;
> +}
> +
> +/* freeframe is where we do our load balancing so it's a little hairy. */
>  static struct frame *
>  freeframe(struct aoedev *d)
>  {
> -	struct frame *f, *e;
> +	struct frame *f, *e, *rf;
>  	struct aoetgt **t;
> -	ulong n;
> +	struct sk_buff *skb;
>  
>  	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
>  		printk(KERN_ERR "aoe: NULL TARGETS!\n");
>  		return NULL;
>  	}
> -	t = d->targets;
> -	do {
> +	t = d->tgt;
> +	t++;
> +	if (t >= &d->targets[NTARGETS] || !*t)
> +		t = d->targets;
> +	for (;;) {
> +		if ((*t)->nout < (*t)->maxout)
>  		if (t != d->htgt)
> -		if ((*t)->ifp->nd)
> -		if ((*t)->nout < (*t)->maxout) {
> -			n = (*t)->nframes;
> +		if ((*t)->ifp->nd) {
> +			rf = NULL;
>  			f = (*t)->frames;
> -			e = f + n;
> +			e = f + (*t)->nframes;
>  			for (; f<e; f++) {
>  				if (f->tag != FREETAG)
>  					continue;
> -				if (atomic_read(&skb_shinfo(f->skb)->dataref) != 1) {
> -					n--;
> +				skb = f->skb;
> +				if (!skb)
> +				if (!(f->skb = skb = new_skb(ETH_ZLEN)))
> +					continue;
> +				if (atomic_read(&skb_shinfo(skb)->dataref) != 1) {
> +					if (!rf)
> +						rf = f;
>  					continue;
>  				}
> -				skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
> -				skb_trim(f->skb, 0);
> +gotone:				skb_shinfo(skb)->nr_frags = skb->data_len = 0;
> +				skb_trim(skb, 0);
>  				d->tgt = t;
>  				ifrotate(*t);
>  				return f;
>  			}
> -			if (n == 0)	/* slow polling network card */
> +			/* Work can be done, but the network layer is
> +			   holding our precious packets.  Try to grab
> +			   one from the pool. */
> +			f = rf;
> +			if (f == NULL) {	/* more paranoia */
> +				printk(KERN_ERR "aoe: freeframe: unexpected null rf.\n");
> +				d->flags |= DEVFL_KICKME;
> +				return NULL;
> +			}
> +			skb = skb_pool_get(d);
> +			if (skb) {
> +				skb_pool_put(d, f->skb);
> +				f->skb = skb;
> +				goto gotone;
> +			}
> +			(*t)->dataref++;
> +			if ((*t)->nout == 0)
>  				d->flags |= DEVFL_KICKME;
>  		}
> +		if (t == d->tgt)	/* we've looped and found nada */
> +			break;
>  		t++;
> -	} while (t < &d->targets[NTARGETS] && *t);
> +		if (t >= &d->targets[NTARGETS] || !*t)
> +			t = d->targets;
> +	}
>  	return NULL;
>  }
>  
> @@ -870,44 +929,26 @@ addtgt(struct aoedev *d, char *addr, ulong nframes)
>  
>  	tt = d->targets;
>  	te = tt + NTARGETS;
> -	for (; tt<te; tt++) {
> -		if (*tt == NULL)
> -			break;
> -		else if (memcmp((*tt)->addr, addr, 6) > 0) {
> -			memmove(tt+1, tt, (void *)te-(void *)(tt+1));
> -			break;
> -		}
> -	}
> +	for (; tt<te && *tt; tt++)
> +		;
> +
>  	if (tt == te)
>  		return NULL;
>  
>  	t = kcalloc(1, sizeof *t, GFP_ATOMIC);
> +	if (!t)
> +		return NULL;
>  	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
> -	switch (!t || !f) {
> -	case 0:
> -		t->nframes = nframes;
> -		t->frames = f;
> -		e = f + nframes;
> -		for (; f<e; f++) {
> -			f->tag = FREETAG;
> -			f->skb = new_skb(ETH_ZLEN);
> -			if (!f->skb)
> -				break;
> -		}
> -		if (f == e)
> -			break;
> -		while (f > t->frames) {
> -			f--;
> -			dev_kfree_skb(f->skb);
> -		}
> -	default:
> -		if (f)
> -			kfree(f);
> -		if (t)
> -			kfree(t);
> +	if (!f) {
> +		kfree(t);
>  		return NULL;
>  	}
>  
> +	t->nframes = nframes;
> +	t->frames = f;
> +	e = f + nframes;
> +	for (; f<e; f++)
> +		f->tag = FREETAG;
>  	memcpy(t->addr, addr, sizeof t->addr);
>  	t->ifp = t->ifs;
>  	t->maxout = t->nframes;
> diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
> index 8659796..cf4e58d 100644
> --- a/drivers/block/aoe/aoedev.c
> +++ b/drivers/block/aoe/aoedev.c
> @@ -7,11 +7,13 @@
>  #include <linux/hdreg.h>
>  #include <linux/blkdev.h>
>  #include <linux/netdevice.h>
> +#include <linux/delay.h>
>  #include "aoe.h"
>  
>  static void dummy_timer(ulong);
>  static void aoedev_freedev(struct aoedev *);
> -static void freetgt(struct aoetgt *t);
> +static void freetgt(struct aoedev *d, struct aoetgt *t);
> +static void skbpoolfree(struct aoedev *d);
>  
>  static struct aoedev *devlist;
>  static spinlock_t devlist_lock;
> @@ -125,9 +127,10 @@ aoedev_freedev(struct aoedev *d)
>  	t = d->targets;
>  	e = t + NTARGETS;
>  	for (; t<e && *t; t++)
> -		freetgt(*t);
> +		freetgt(d, *t);
>  	if (d->bufpool)
>  		mempool_destroy(d->bufpool);
> +	skbpoolfree(d);
>  	kfree(d);
>  }
>  
> @@ -176,6 +179,42 @@ aoedev_flush(const char __user *str, size_t cnt)
>  	return 0;
>  }
>  
> +/* I'm not really sure that this is a realistic problem, but if the
> +network driver goes gonzo let's just leak memory after complaining. */
> +static void
> +skbfree(struct sk_buff *skb)
> +{
> +	enum { Sms= 100, Tms= 3*1000};
> +	int i = Tms / Sms;
> +
> +	if (skb == NULL)
> +		return;
> +	while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
> +		msleep_interruptible(Sms);

If the calling process has signal_pending(), this will turn into a busy
loop.  On non-preempt uniproc it'll lock the box.

The only way this code is safe is if it's called from a kernel thread.  Is
it?

> +	if (i <= 0) {
> +		printk(KERN_ERR
> +			"aoe: %s holds ref: cannot free skb -- memory leaked.\n",
> +			skb->dev ? skb->dev->name : "netif");
> +		return;
> +	}
> +	skb_shinfo(skb)->nr_frags = skb->data_len = 0;
> +	skb_trim(skb, 0);
> +	dev_kfree_skb(skb);
> +}
> +
> +static void
> +skbpoolfree(struct aoedev *d)
> +{
> +	struct sk_buff *skb;
> +
> +	while ((skb = d->skbpool_hd)) {
> +		d->skbpool_hd = skb->next;
> +		skb->next = NULL;
> +		skbfree(skb);
> +	}
> +	d->skbpool_tl = NULL;
> +}
> +
>  /* find it or malloc it */
>  struct aoedev *
>  aoedev_by_sysminor_m(ulong sysminor)
> @@ -214,16 +253,14 @@ aoedev_by_sysminor_m(ulong sysminor)
>  }
>  
>  static void
> -freetgt(struct aoetgt *t)
> +freetgt(struct aoedev *d, struct aoetgt *t)
>  {
>  	struct frame *f, *e;
>  
>  	f = t->frames;
>  	e = f + t->nframes;
> -	for (; f<e; f++) {
> -		skb_shinfo(f->skb)->nr_frags = 0;
> -		dev_kfree_skb(f->skb);
> -	}
> +	for (; f<e; f++)
> +		skbfree(f->skb);
>  	kfree(t->frames);
>  	kfree(t);
>  }
> -- 
> 1.5.2.1
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 08/12] only schedule work once
  2007-06-26 18:50 ` [PATCH 08/12] only schedule work once Ed L. Cashin
@ 2007-07-03  4:37   ` Andrew Morton
  0 siblings, 0 replies; 28+ messages in thread
From: Andrew Morton @ 2007-07-03  4:37 UTC (permalink / raw)
  To: Ed L. Cashin; +Cc: linux-kernel, Greg K-H

On Tue, 26 Jun 2007 14:50:12 -0400 "Ed L. Cashin" <ecashin@coraid.com> wrote:

> Only schedule work once.

why?

> Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
> ---
>  drivers/block/aoe/aoecmd.c |    2 ++
>  1 files changed, 2 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
> index 89df9de..8d6540b 100644
> --- a/drivers/block/aoe/aoecmd.c
> +++ b/drivers/block/aoe/aoecmd.c
> @@ -681,6 +681,8 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
>  			d->fw_ver, (long long)ssize);
>  	d->ssize = ssize;
>  	d->geo.start = 0;
> +	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
> +		return;
>  	if (d->gd != NULL) {
>  		d->gd->capacity = ssize;
>  		d->flags |= DEVFL_NEWSIZE;

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 09/12] remove race between use and initialization of locks
  2007-06-26 18:50 ` [PATCH 09/12] remove race between use and initialization of locks Ed L. Cashin
@ 2007-07-03  4:38   ` Andrew Morton
  0 siblings, 0 replies; 28+ messages in thread
From: Andrew Morton @ 2007-07-03  4:38 UTC (permalink / raw)
  To: Ed L. Cashin; +Cc: linux-kernel, Greg K-H

On Tue, 26 Jun 2007 14:50:12 -0400 "Ed L. Cashin" <ecashin@coraid.com> wrote:

> This change was originally submitted by Alexey Dobriyan in an email
> with ...
> 
>   Message-ID: <20070325190221.GA5308@martell.zuzino.mipt.ru>
> 
> and the comment,
> 
>   Some drivers do register_chrdev() before lock or semaphore used in
>   corresponding file_operations is initialized.
> 
> Andrew Morton commented that these locks should be initialized at
> compile time, but Alexey Debriyan pointed out that the Documentation
> tells us to use dynamic initialization whenever possible, and then the
> discussion petered out.
> 
>   http://preview.tinyurl.com/2pxq6p
> 
> I believe we made these locks dynamic because of the notice in
> Documentation/spinlocks.txt, which says that static initializers are
> deprecated:
> 
>   UPDATE March 21 2005 Amit Gud <gud@eth.net>
> 
>   Macros SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated and will be
>   removed soon. So for any new code dynamic initialization should be used:

The document is inaccurate.

Yes, the use of SPIN_LOCK_UNLOCKED should be avoided because it breaks
lockdep.  But it can be replaced with DEFINE_SPINLOCK: there is no need to
switch to runtime initialisation.



> ...
> 
> In any case, the patch below makes the code correct and in keeping
> with the existing documentation.  If the existing docs are wrong, I'd
> be happy to follow up with a patch that corrects them and makes these
> aoechr.c locks static.
> 
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
> ---
>  drivers/block/aoe/aoechr.c |    4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
> index 10b38a7..2b4f873 100644
> --- a/drivers/block/aoe/aoechr.c
> +++ b/drivers/block/aoe/aoechr.c
> @@ -256,13 +256,13 @@ aoechr_init(void)
>  {
>  	int n, i;
>  
> +	sema_init(&emsgs_sema, 0);
> +	spin_lock_init(&emsgs_lock);
>  	n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops);
>  	if (n < 0) { 
>  		printk(KERN_ERR "aoe: can't register char device\n");
>  		return n;
>  	}
> -	sema_init(&emsgs_sema, 0);
> -	spin_lock_init(&emsgs_lock);
>  	aoe_class = class_create(THIS_MODULE, "aoe");
>  	if (IS_ERR(aoe_class)) {
>  		unregister_chrdev(AOE_MAJOR, "aoechr");
> -- 
> 1.5.2.1
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 07/12] use a dynamic pool of sk_buffs to keep up with fast targets
  2007-07-03  4:36   ` Andrew Morton
@ 2007-07-03  4:40     ` David Miller
  2007-07-03 18:45       ` Matt Mackall
  2007-07-06 17:09       ` Ed L. Cashin
  0 siblings, 2 replies; 28+ messages in thread
From: David Miller @ 2007-07-03  4:40 UTC (permalink / raw)
  To: akpm; +Cc: ecashin, linux-kernel, greg, netdev

From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 2 Jul 2007 21:36:36 -0700

> My initial thought is that if there is a legitimate need for this
> new capability then it should be made available to other parts of
> the kernel rather than being private to the AEO driver.

Absolutely.

We even used to have something like this on a per-cpu basis but using
generic SLAB is so much better for caching and NUMA that we got rid of
that.

Every sk_buff private "quicklist" pool implementation you
see should essentially be NAK'd from the get go, it's
meaningless and if it's really needed one should investigate
why SKB allocations become such a problem instead of papering
over the issue. :-)

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 10/12] add module parameter for users who need more outstanding I/O
  2007-06-26 18:50 ` [PATCH 10/12] add module parameter for users who need more outstanding I/O Ed L. Cashin
@ 2007-07-03  4:41   ` Andrew Morton
  0 siblings, 0 replies; 28+ messages in thread
From: Andrew Morton @ 2007-07-03  4:41 UTC (permalink / raw)
  To: Ed L. Cashin; +Cc: linux-kernel, Greg K-H

On Tue, 26 Jun 2007 14:50:12 -0400 "Ed L. Cashin" <ecashin@coraid.com> wrote:

> Add module parameter for users who need more outstanding I/O.
> 

why?

I assume that there is some performance benefit to permitting some more
outstanding IO, but that's just a wild guess.

Assuming that the guess is right, some explanation of the effects of
altering this tunable would be appropriate.


> ---
>  drivers/block/aoe/aoecmd.c |   10 +++++++---
>  1 files changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
> index 8d6540b..9de0024 100644
> --- a/drivers/block/aoe/aoecmd.c
> +++ b/drivers/block/aoe/aoecmd.c
> @@ -17,6 +17,11 @@ static int aoe_deadsecs = 60 * 3;
>  module_param(aoe_deadsecs, int, 0644);
>  MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
>  
> +static int aoe_maxout = 16;
> +module_param(aoe_maxout, int, 0644);
> +MODULE_PARM_DESC(aoe_maxout,
> +	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
> +
>  static struct sk_buff *
>  new_skb(ulong len)
>  {
> @@ -967,7 +972,6 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
>  	struct aoeif *ifp;
>  	ulong flags, sysminor, aoemajor;
>  	struct sk_buff *sl;
> -	enum { MAXFRAMES = 16 };
>  	u16 n;
>  
>  	h = aoe_hdr(skb);
> @@ -992,8 +996,8 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
>  	}
>  
>  	n = be16_to_cpu(ch->bufcnt);
> -	if (n > MAXFRAMES)	/* keep it reasonable */
> -		n = MAXFRAMES;
> +	if (n > aoe_maxout)	/* keep it reasonable */
> +		n = aoe_maxout;
>  
>  	d = aoedev_by_sysminor_m(sysminor);
>  	if (d == NULL) {
> -- 
> 1.5.2.1
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 07/12] use a dynamic pool of sk_buffs to keep up with fast targets
  2007-07-03  4:40     ` David Miller
@ 2007-07-03 18:45       ` Matt Mackall
  2007-07-03 19:18         ` Stephen Hemminger
  2007-07-06 17:09       ` Ed L. Cashin
  1 sibling, 1 reply; 28+ messages in thread
From: Matt Mackall @ 2007-07-03 18:45 UTC (permalink / raw)
  To: David Miller; +Cc: akpm, ecashin, linux-kernel, greg, netdev

On Mon, Jul 02, 2007 at 09:40:36PM -0700, David Miller wrote:
> From: Andrew Morton <akpm@linux-foundation.org>
> Date: Mon, 2 Jul 2007 21:36:36 -0700
> 
> > My initial thought is that if there is a legitimate need for this
> > new capability then it should be made available to other parts of
> > the kernel rather than being private to the AEO driver.
> 
> Absolutely.
> 
> We even used to have something like this on a per-cpu basis but using
> generic SLAB is so much better for caching and NUMA that we got rid of
> that.
> 
> Every sk_buff private "quicklist" pool implementation you
> see should essentially be NAK'd from the get go, it's
> meaningless and if it's really needed one should investigate
> why SKB allocations become such a problem instead of papering
> over the issue. :-)

This is in the VM write-back path. SLAB is insufficient to avoid
deadlock.

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 07/12] use a dynamic pool of sk_buffs to keep up with fast targets
  2007-07-03 18:45       ` Matt Mackall
@ 2007-07-03 19:18         ` Stephen Hemminger
  0 siblings, 0 replies; 28+ messages in thread
From: Stephen Hemminger @ 2007-07-03 19:18 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev

On Tue, 3 Jul 2007 13:45:33 -0500
Matt Mackall <mpm@selenic.com> wrote:

> On Mon, Jul 02, 2007 at 09:40:36PM -0700, David Miller wrote:
> > From: Andrew Morton <akpm@linux-foundation.org>
> > Date: Mon, 2 Jul 2007 21:36:36 -0700
> > 
> > > My initial thought is that if there is a legitimate need for this
> > > new capability then it should be made available to other parts of
> > > the kernel rather than being private to the AEO driver.
> > 
> > Absolutely.
> > 
> > We even used to have something like this on a per-cpu basis but using
> > generic SLAB is so much better for caching and NUMA that we got rid of
> > that.
> > 
> > Every sk_buff private "quicklist" pool implementation you
> > see should essentially be NAK'd from the get go, it's
> > meaningless and if it's really needed one should investigate
> > why SKB allocations become such a problem instead of papering
> > over the issue. :-)
> 
> This is in the VM write-back path. SLAB is insufficient to avoid
> deadlock.
> 
Then where is the code to drain your pool back to the normal pool.
The problem with private pools is that they work great for benchmarks
and if that is the only code in the system. But they suck if other
code needs to run. How do you keep your private stash from running
the network device out of memory to receive packets?

-- 
Stephen Hemminger <shemminger@linux-foundation.org>


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 07/12] use a dynamic pool of sk_buffs to keep up with fast targets
  2007-07-03  4:40     ` David Miller
  2007-07-03 18:45       ` Matt Mackall
@ 2007-07-06 17:09       ` Ed L. Cashin
  1 sibling, 0 replies; 28+ messages in thread
From: Ed L. Cashin @ 2007-07-06 17:09 UTC (permalink / raw)
  To: akpm; +Cc: David Miller, linux-kernel, greg, netdev

Hi.  I will address the style issues and other things that Andrew
Morton pointed out---Thanks again for the feedback.

As far as the skb pool goes, I'm afraid my comment is misleading.

What this Patch Does 

  Even before this recent series of 12 patches to 2.6.22-rc4, the aoe
  driver was reusing a small set of skbs that were allocated once and
  were only used for outbound AoE commands.
  
  The network layer cannot be allowed to put_page on the data that is
  still associated with a bio we haven't returned to the block layer,
  so the aoe driver (even before the patch under discussion) is still
  the owner of skbs that have been handed to the network layer for
  transmission.  We need to keep track of these skbs so that we can
  free them, but by tracking them, we can also easily re-use them.
  
  The new patch was a response to the behavior of certain network
  drivers.  We cannot reuse an skb that the network driver still has
  in its transmit ring.  Network drivers can defer transmit ring
  cleanup and then use the state in the skb to determine how many data
  segments to clean up in its transmit ring.  The tg3 driver is one
  driver that behaves in this way.
  
  When the network driver defers cleanup of its transmit ring, the aoe
  driver can find itself in a situation where it would like to send an
  AoE command, and the AoE target is ready for more work, but the
  network driver still has all of the pre-allocated skbs.  In that
  case, the new patch just calls alloc_skb, as you'd expect.
  
  We don't want to get carried away, though.  We try not to do
  excessive allocation in the write path, so we cap the number of skbs
  we dynamically allocate.
  
  Probably calling it a "dynamic pool" is misleading.  We were already
  trying to use a small fixed-size set of pre-allocated skbs before
  this patch, and this patch just provides a little headroom (with a
  ceiling, though) to accomodate network drivers that hang onto skbs,
  by allocating when needed.  The d->skbpool_hd list of allocated skbs
  is necessary so that we can free them later.
  
  We didn't notice the need for this headroom until AoE targets got
  fast enough, but the comment summarizing this patch still wasn't
  very good.  So, when I resubmit this patch, I will use a different
  comment:
  
    dynamically allocate a capped number of skbs when necessary


Alternatives

  If the network layer never did a put_page on the pages in the bio's
  we get from the block layer, then it would be possible for us to
  hand skbs to the network layer and forget about them, allowing the
  network layer to free skbs itself (and thereby calling our own
  skb->destructor callback function if we needed that).  In that case
  we could get rid of the pre-allocated skbs and also the
  d->skbpool_hd, instead just calling alloc_skb every time we wanted
  to transmit a packet.  The slab allocator would effectively maintain
  the list of skbs.
  
  Besides a loss of CPU cache locality, the main concern with that
  approach the danger that it would increase the likelihood of
  deadlock when VM is trying to free pages by writing dirty data from
  the page cache through the aoe driver out to persistent storage on
  an AoE device.  Right now we have a situation where we have
  pre-allocation that corresponds to how much we use, which seems
  ideal.
  
  Of course, there's still the separate issue of receiving the packets
  that tell us that a write has successfully completed on the AoE
  target.  When memory is low and VM is using AoE to flush dirty data
  to free up pages, it would be perfect if there were a way for us to
  register a fast callback that could recognize write command
  completion responses.  But I don't think the current problems with
  the receive side of the situation are a justification for
  exacerbating the problem on the transmit side.

-- 
  Support - http://www.coraid.com/support/howto.html
  Ed L Cashin <ecashin@coraid.com>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 02/12] handle multiple network paths to AoE device
  2007-07-03  4:29   ` Andrew Morton
@ 2007-07-11 14:46     ` Ed L. Cashin
  2007-07-16 22:17     ` [PATCH] stacked ifs (was Re: [PATCH 02/12] handle multiple network paths to AoE device) Ed L. Cashin
  1 sibling, 0 replies; 28+ messages in thread
From: Ed L. Cashin @ 2007-07-11 14:46 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Greg K-H

On Mon, Jul 02, 2007 at 09:29:49PM -0700, Andrew Morton wrote:
> On Tue, 26 Jun 2007 14:50:10 -0400 "Ed L. Cashin" <ecashin@coraid.com> wrote:
...
> > +loop:
> > +	skb = aoecmd_ata_id(d);
> >  	spin_unlock_irqrestore(&d->lock, flags);
> > +	if (!skb && !msleep_interruptible(200)) {
> > +		spin_lock_irqsave(&d->lock, flags);
> > +		goto loop;
> > +	}
> > +	aoenet_xmit(skb);
> >  	aoecmd_cfg(major, minor);
> > -
> >  	return 0;
> >  }
> 
> interruptible sleep?  Does this code work as intended when there's a signal
> pending?  (Maybe that's what the interruptible sleep is for: don't know,
> and am not inclined to work it out given the (low) level of comments in
> here and the (lower) level of changelogging).

Yes, if a signal is pending, then msleep_interruptible will not return
0.  That means we will not loop but will call aoenet_xmit with a NULL
skb, which is a noop.  So if the system is too low on memory or the
aoe driver is too low on frames, then the user can hit control-C to
interrupt the attempt to do a revalidate.

I will add a comment to that effect in the resubmitted patch.

-- 
  Ed L Cashin <ecashin@coraid.com>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH] stacked ifs (was Re: [PATCH 02/12] handle multiple network paths to AoE device)
  2007-07-03  4:29   ` Andrew Morton
  2007-07-11 14:46     ` Ed L. Cashin
@ 2007-07-16 22:17     ` Ed L. Cashin
  2007-07-16 22:31       ` Andrew Morton
  1 sibling, 1 reply; 28+ messages in thread
From: Ed L. Cashin @ 2007-07-16 22:17 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Greg K-H

On Mon, Jul 02, 2007 at 09:29:49PM -0700, Andrew Morton wrote:
> On Tue, 26 Jun 2007 14:50:10 -0400 "Ed L. Cashin" <ecashin@coraid.com> wrote:
...
> > +static struct frame *
> > +freeframe(struct aoedev *d)
> >  {
> > +	struct frame *f, *e;
> > +	struct aoetgt **t;
> > +	ulong n;
> > +
> > +	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
> > +		printk(KERN_ERR "aoe: NULL TARGETS!\n");
> > +		return NULL;
> > +	}
> > +	t = d->targets;
> > +	do {
> > +		if (t != d->htgt)
> > +		if ((*t)->ifp->nd)
> > +		if ((*t)->nout < (*t)->maxout) {
> 
> ugh.  Do this:
> 
> 	do {
> 		if (t == d->htgt)
> 			continue;
> 		if (!(*t)->ifp->nd)
> 			continue;
> 		if ((*t)->nout >= (*t)->maxout)
> 			continue;
> 			
> 		<stuff>
> 	} while (++t ...)

Do you think the "stacked ifs" in the first version above could be
accepted as a convenient extension to the K&R-based conventions in
Documentation/CodingStyle?  Brian Kerhnighan (the "K" in "K&R") and
Ken Thompson, were among the UNIX hackers who produced the UNIX v7
files that have stacked ifs:

  namei.c, dump.c, iostat.c od.c sa.c, vplot.c, refer/what2.c,
  sed/sed1.c, tbl/t8.c, chess/{agen, play, stdin}.c

Certainly, Linux isn't UNIX, but stacked ifs needn't be treated as
foreign.  They're in the K&R tradition that Documentation/CodingStyle
is based on.

Stacked ifs are functionally equivalent to a single if with its
conditionals joined by "&&".  Once that is retained, they are not at
all difficult to recognize and understand.  And they have some
advantages over the single if that uses "&&".

When editing code, it is easier to remove conditionals that are no
longer needed, or to arrange conditionals in a different order.
Conditional expressions stand out clearly.

When stacked ifs are in use, the resulting patches can be easier to
read because fewer lines need to change (compared to splitting on the
&&), and also simply because the text is more regular when it comes to
parentheses and ampersands.  There is less distracting noise.

Of course, my primary motivation is for us to be able to contribute
aoe driver patches that use this style, and that would be fantastic,
but I don't think it is unrealistic to say that the adoption of this
style would benefit others, helping to make patches easier to review
in some cases.

Understanding it only takes an understanding of C itself, so the only
"new" change would be a slight and justifiable loosening of the
indentation policy.

Signed-off-by: "Ed L. Cashin" <ecashin@coraid.com>

---
 Documentation/CodingStyle |   21 +++++++++++++++++++++
 1 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index a667eb1..bb2bb57 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -94,6 +94,27 @@ void fun(int a, int b, int c)
 		next_statement;
 }
 
+One way to break a long condition in an if statement is to use stacked
+ifs.  The following code extracts are equivalent, but the version with
+stacked ifs is easier to read and edit, and it generates more specific
+patches.
+
+	/* version one: stacked ifs */
+	if (condition)
+	if (condition2)
+	if (condition3)
+	if (condition4)
+		first_statement;
+	else
+		next_statement;
+
+	/* version two: logical and */
+	if (condition1 && condition2 && condition3 && condition4)
+		first_statement;
+	else
+		next_statement;
+
+
 		Chapter 3: Placing Braces and Spaces
 
 The other issue that always comes up in C styling is the placement of
-- 
1.5.2.1


-- 
  Ed L Cashin <ecashin@coraid.com>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] stacked ifs (was Re: [PATCH 02/12] handle multiple network paths to AoE device)
  2007-07-16 22:17     ` [PATCH] stacked ifs (was Re: [PATCH 02/12] handle multiple network paths to AoE device) Ed L. Cashin
@ 2007-07-16 22:31       ` Andrew Morton
  2007-07-17  0:01         ` Greg KH
  0 siblings, 1 reply; 28+ messages in thread
From: Andrew Morton @ 2007-07-16 22:31 UTC (permalink / raw)
  To: Ed L. Cashin; +Cc: linux-kernel, Greg K-H

On Mon, 16 Jul 2007 18:17:44 -0400
"Ed L. Cashin" <ecashin@coraid.com> wrote:

> > ugh.  Do this:
> > 
> > 	do {
> > 		if (t == d->htgt)
> > 			continue;
> > 		if (!(*t)->ifp->nd)
> > 			continue;
> > 		if ((*t)->nout >= (*t)->maxout)
> > 			continue;
> > 			
> > 		<stuff>
> > 	} while (++t ...)
> 
> Do you think the "stacked ifs" in the first version above could be
> accepted as a convenient extension to the K&R-based conventions in
> Documentation/CodingStyle?

Maybe.  I don't recall seeing any kernel code which uses that convention:
everyone uses &&.  So personally I'd prefer to see kernel code stick to the
one convention, given that there is not, afacit, any significant advantage
to the alternative one.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] stacked ifs (was Re: [PATCH 02/12] handle multiple network paths to AoE device)
  2007-07-16 22:31       ` Andrew Morton
@ 2007-07-17  0:01         ` Greg KH
  2007-07-18 15:24           ` Jan Engelhardt
  0 siblings, 1 reply; 28+ messages in thread
From: Greg KH @ 2007-07-17  0:01 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Ed L. Cashin, linux-kernel

On Mon, Jul 16, 2007 at 03:31:55PM -0700, Andrew Morton wrote:
> On Mon, 16 Jul 2007 18:17:44 -0400
> "Ed L. Cashin" <ecashin@coraid.com> wrote:
> 
> > > ugh.  Do this:
> > > 
> > > 	do {
> > > 		if (t == d->htgt)
> > > 			continue;
> > > 		if (!(*t)->ifp->nd)
> > > 			continue;
> > > 		if ((*t)->nout >= (*t)->maxout)
> > > 			continue;
> > > 			
> > > 		<stuff>
> > > 	} while (++t ...)
> > 
> > Do you think the "stacked ifs" in the first version above could be
> > accepted as a convenient extension to the K&R-based conventions in
> > Documentation/CodingStyle?
> 
> Maybe.  I don't recall seeing any kernel code which uses that convention:
> everyone uses &&.  So personally I'd prefer to see kernel code stick to the
> one convention, given that there is not, afacit, any significant advantage
> to the alternative one.

I agree, let's stick with the convention we already have and use
instead.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] stacked ifs (was Re: [PATCH 02/12] handle multiple network paths to AoE device)
  2007-07-17  0:01         ` Greg KH
@ 2007-07-18 15:24           ` Jan Engelhardt
  0 siblings, 0 replies; 28+ messages in thread
From: Jan Engelhardt @ 2007-07-18 15:24 UTC (permalink / raw)
  To: Greg KH; +Cc: Andrew Morton, Ed L. Cashin, linux-kernel


On Jul 16 2007 17:01, Greg KH wrote:
>> 
>> > > ugh.  Do this:
>> > > 
>> > > 	do {
>> > > 		if (t == d->htgt)
>> > > 			continue;
>> > > 		if (!(*t)->ifp->nd)
>> > > 			continue;
>> > > 		if ((*t)->nout >= (*t)->maxout)
>> > > 			continue;
>> > > 			
>> > > 		<stuff>
>> > > 	} while (++t ...)
>> > 
>> > Do you think the "stacked ifs" in the first version above could be
>> > accepted as a convenient extension to the K&R-based conventions in
>> > Documentation/CodingStyle?
>> 
>> Maybe.  I don't recall seeing any kernel code which uses that convention:
>> everyone uses &&.  So personally I'd prefer to see kernel code stick to the
>> one convention, given that there is not, afacit, any significant advantage
>> to the alternative one.
>
>I agree, let's stick with the convention we already have and use
>instead.

Yup. Either the "do this" (see above) or the "&&" variant, though, the latter
can become quite nested or long.

[ In fact, if you have
  void function(struct something *arg)
  {
  	if (arg != NULL) {
  		lots_of_code;
  	}
  }
  it is perhaps better to write as
  {
  	if (arg == NULL)
  		return;
  	lots_of_code;
  }
  since that reduces the indent by at least one. ]


	Jan
-- 

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2007-07-18 15:24 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-06-26 18:50 [PATCH 01/12] bring driver version number to 47 Ed L. Cashin
2007-06-26 18:50 ` [PATCH 03/12] mac_addr: avoid 64-bit arch compiler warnings Ed L. Cashin
2007-06-26 18:50 ` [PATCH 02/12] handle multiple network paths to AoE device Ed L. Cashin
2007-07-03  4:29   ` Andrew Morton
2007-07-11 14:46     ` Ed L. Cashin
2007-07-16 22:17     ` [PATCH] stacked ifs (was Re: [PATCH 02/12] handle multiple network paths to AoE device) Ed L. Cashin
2007-07-16 22:31       ` Andrew Morton
2007-07-17  0:01         ` Greg KH
2007-07-18 15:24           ` Jan Engelhardt
2007-06-26 18:50 ` [PATCH 05/12] eliminate goto and improve readability Ed L. Cashin
2007-06-26 18:50 ` [PATCH 07/12] use a dynamic pool of sk_buffs to keep up with fast targets Ed L. Cashin
2007-07-03  4:36   ` Andrew Morton
2007-07-03  4:40     ` David Miller
2007-07-03 18:45       ` Matt Mackall
2007-07-03 19:18         ` Stephen Hemminger
2007-07-06 17:09       ` Ed L. Cashin
2007-06-26 18:50 ` [PATCH 06/12] user can ask driver to forget previously detected devices Ed L. Cashin
2007-06-26 18:50 ` [PATCH 04/12] clean up udev configuration example Ed L. Cashin
2007-06-26 18:50 ` [PATCH 12/12] the aoeminor doesn't need a long format Ed L. Cashin
2007-06-26 19:51   ` Randy Dunlap
2007-06-26 19:59     ` Ed L. Cashin
2007-06-26 18:50 ` [PATCH 10/12] add module parameter for users who need more outstanding I/O Ed L. Cashin
2007-07-03  4:41   ` Andrew Morton
2007-06-26 18:50 ` [PATCH 11/12] remove extra space in prototypes for consistency Ed L. Cashin
2007-06-26 18:50 ` [PATCH 09/12] remove race between use and initialization of locks Ed L. Cashin
2007-07-03  4:38   ` Andrew Morton
2007-06-26 18:50 ` [PATCH 08/12] only schedule work once Ed L. Cashin
2007-07-03  4:37   ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).