Change the target->end_io function to take the rw param.

Use target->end_io rather than hooking in dm-iostats.c

Instead of taking the average of the individual io latencies calculate
the 'average end_io time' - 'average start time'.  This removes the need
for passing context (ie. start time) between the map function and the
end_io function.
--- diff/drivers/md/dm-iostats.c	2002-12-16 15:22:03.000000000 +0000
+++ source/drivers/md/dm-iostats.c	2002-12-16 15:32:46.000000000 +0000
@@ -12,65 +12,33 @@
 #include <linux/mempool.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
+#include <linux/time.h>
 
-static kmem_cache_t *_iostats_cache;
-mempool_t *_iostats_pool;
 
 typedef unsigned long jiffies_t;
-
-struct dm_iostats {
-	struct dm_target *ti;
-
-	int rw;
-	jiffies_t start_io;
-
-	void (*end_io) (struct buffer_head * bh, int uptodate);
-	void *context;
-};
-
-/*
- * io status for a linear range of a device.
- */
-typedef	uint16_t	count_t;
-#define	MAX_COUNT	((count_t) -1)
-#define MIN_IOSTATS 128
 #define IOF_LATENCY 1
 
 struct iostats_c {
 	unsigned long flags;
 	struct dm_dev *dev;
-	atomic_t reads;
-	atomic_t writes;
+	atomic_t ios[2];
 
 	/*
 	 * These fields are only present if we are recording the
-	 * average completion times.
+	 * io latency.
 	 */
 	spinlock_t lock;
-	jiffies_t total_r;
-	jiffies_t total_w;
-	count_t count_r;
-	count_t count_w;
+	struct timespec start[2];
+	struct timespec end[2];
 };
 
-
-static inline struct dm_iostats *alloc_io(void)
-{
-	return mempool_alloc(_iostats_pool, GFP_NOIO);
-}
-
-static inline void free_io(struct dm_iostats *io)
-{
-	mempool_free(io, _iostats_pool);
-}
-
 /*
- * Construct a io status mapping: <dev_path> <get averages y/n>
+ * Construct a io status mapping: <dev_path> <record latency y/n>
  */
 static int iostats_ctr(struct dm_target *ti, int argc, char **argv)
 {
 	size_t size;
-	int averaging;
+	int latency;
 	struct iostats_c *ic;
 
 	if (argc != 2) {
@@ -83,9 +51,9 @@
 		return -EINVAL;
 	}
 
-	averaging = (toupper(argv[1][0]) == 'Y');
+	latency = (toupper(argv[1][0]) == 'Y');
 
-	if (averaging)
+	if (latency)
 		size = sizeof(*ic);
 	else
 		size = (size_t) &((struct iostats_c *) NULL)->lock;
@@ -98,7 +66,7 @@
 	}
 
 	memset(ic, 0, size);
-	if (averaging) {
+	if (latency) {
 		set_bit(IOF_LATENCY, &ic->flags);
 		ic->lock = SPIN_LOCK_UNLOCKED;
 	}
@@ -109,8 +77,8 @@
 		kfree(ic);
 		return -ENXIO;
 	}
-	atomic_set(&ic->reads, 0);
-	atomic_set(&ic->writes, 0);
+	atomic_set(&ic->ios[READ], 0);
+	atomic_set(&ic->ios[WRITE], 0);
 
 	return 0;
 }
@@ -122,122 +90,112 @@
 	kfree(ic);
 }
 
-/*
- * average latency stats end_io function */
-static void iostats_avg_end_io(struct buffer_head *bh, int uptodate)
+static void timespec_sum(struct timespec *lhs, struct timespec *rhs)
 {
-	int flags;
-	jiffies_t j;
-	struct dm_iostats *io = bh->b_private;
-	struct iostats_c *ic = io->ti->private;
-
-	spin_lock_irqsave(&ic->lock, flags);
-
-	/* average io latency; ignore an io on jiffies overflow */
-	if (jiffies >= io->start_io) {
-		j = jiffies - io->start_io;
+	lhs->tv_sec += rhs->tv_sec;
+	lhs->tv_nsec += rhs->tv_nsec;
 
-		if (io->rw == WRITE) {
-			ic->total_w += j;
-			ic->count_w++;
-			if (ic->count_w > MAX_COUNT) {
-				ic->total_w /= ic->count_w;
-				ic->count_w = 1;
-			}
-		} else {
-			ic->total_r += j;
-			ic->count_r++;
-			if (ic->count_r > MAX_COUNT) {
-				ic->total_r /= ic->count_r;
-				ic->count_r = 1;
-			}
-		}
+	if (lhs->tv_nsec > 1000000000L) {
+		lhs->tv_sec++;
+		lhs->tv_nsec -= 1000000000L;
 	}
-
-	spin_unlock_irqrestore(&ic->lock, flags);
-
-	bh->b_end_io = io->end_io;
-	bh->b_private = io->context;
-
-	free_io(io);
-
-	bh->b_end_io(bh, uptodate);
 }
 
-/*
- * Hook the io so we can work get the io completion time.
- */
-static int _hook_io(struct iostats_c *stats, struct dm_target *ti,
-		    struct buffer_head *bh, int rw)
+static void reset_latency(struct iostats_c *ic, int rw)
 {
-	struct dm_iostats *io;
-
-	io = alloc_io();
-	if (!io)
-		return -ENOMEM;
-
-	io->ti = ti;
-	io->rw = rw;
-	io->start_io = jiffies;
-
-	/* hook the end io request fn */
-	io->end_io = bh->b_end_io;
-	io->context = bh->b_private;
-	bh->b_end_io = iostats_avg_end_io;
-	bh->b_private = io;
+	int flags;
 
-	return 0;
+	spin_lock_irqsave(&ic->lock, flags);
+	memset(&ic->start[rw], 0, sizeof(struct timespec));
+	memset(&ic->end[rw], 0, sizeof(struct timespec));
+	spin_unlock_irqrestore(&ic->lock, flags);
 }
 
 /* read/write statistics mapping */
 static int iostats_map(struct dm_target *ti, struct buffer_head *bh, int rw)
 {
+	int flags;
+	struct timespec now;
 	struct iostats_c *ic = (struct iostats_c *) ti->private;
 
-	/* FIXME: what do we do when these counters wrap ? */
-	if (rw == WRITE)
-		atomic_inc(&ic->writes);
+	if (!test_bit(IOF_LATENCY, &ic->flags))
+		atomic_inc(&ic->ios[rw]);
 
-	else
-		atomic_inc(&ic->reads);
+	else {
+		if (atomic_inc_and_test(&ic->ios[rw]))
+			reset_latency(ic, rw);
 
-	if (test_bit(IOF_LATENCY, &ic->flags))
-		_hook_io(ic, ti, bh, rw);
+		jiffies_to_timespec(jiffies, &now);
+		spin_lock_irqsave(&ic->lock, flags);
+		timespec_sum(&ic->start[rw], &now);
+		spin_unlock_irqrestore(&ic->lock, flags);
+	}
 
 	/* map */
 	bh->b_rdev = ic->dev->dev;
-
 	return 1;
 }
 
-static inline uint _calc(jiffies_t j, count_t c)
+/*
+ * If we're measuring io latency, this function adds on the time
+ * that the io completed.
+ */
+static void iostats_end_io(struct dm_target *ti, struct buffer_head *bh,
+			   int rw, int error)
 {
-	const jiffies_t m = 1000 / HZ;
+	int flags;
+	struct timespec now;
+	struct iostats_c *ic = ti->private;
 
-	return (c ? (j * m / c) : (j * m));
+	if (test_bit(IOF_LATENCY, &ic->flags)) {
+		jiffies_to_timespec(jiffies, &now);
+
+		spin_lock_irqsave(&ic->lock, flags);
+		timespec_sum(&ic->end[rw], &now);
+		spin_unlock_irqrestore(&ic->lock, flags);
+	}
+}
+
+/*
+ * Converts a timespec to milliseconds
+ */
+static inline unsigned long to_milli(struct timespec *ts)
+{
+	return (ts->tv_sec * 1000) + (ts->tv_nsec / 1000000);
+}
+
+/*
+ * Calculates the average latency in milliseconds.
+ */
+static unsigned long calc_latency(struct iostats_c *ic, int rw)
+{
+	unsigned long start_total = to_milli(&ic->start[rw]);
+	unsigned long end_total = to_milli(&ic->end[rw]);
+
+	return (end_total - start_total) /
+		(unsigned long) atomic_read(&ic->ios[rw]);
 }
 
-/* read/write statistics mapping */
 static int iostats_status(struct dm_target *ti, status_type_t type,
-			 char *result, int maxlen)
+			  char *result, int maxlen)
 {
 	struct iostats_c *ic = (struct iostats_c *) ti->private;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
-		if (test_bit(IOF_LATENCY, &ic->flags)) {
+		if (!test_bit(IOF_LATENCY, &ic->flags)) {
 			snprintf(result, maxlen, "%s %u %u",
 				 kdevname(to_kdev_t(ic->dev->bdev->bd_dev)),
-				 atomic_read(&ic->reads), atomic_read(&ic->writes));
-		} else {
-			uint avg_r, avg_w;
+				 atomic_read(&ic->ios[READ]),
+				 atomic_read(&ic->ios[WRITE]));
 
-			avg_r = _calc(ic->total_r, ic->count_r);
-			avg_w = _calc(ic->total_w, ic->count_w);
-			snprintf(result, maxlen, "%s %u %u %u %u",
+		} else {
+			snprintf(result, maxlen, "%s %u %u %lu %lu",
 				 kdevname(to_kdev_t(ic->dev->bdev->bd_dev)),
-				 atomic_read(&ic->reads), atomic_read(&ic->writes),
-				 avg_r, avg_w);
+				 atomic_read(&ic->ios[READ]),
+				 atomic_read(&ic->ios[WRITE]),
+				 calc_latency(ic, READ),
+				 calc_latency(ic, WRITE));
 		}
 		break;
 
@@ -256,37 +214,13 @@
 	.ctr    = iostats_ctr,
 	.dtr    = iostats_dtr,
 	.map    = iostats_map,
+	.end_io = iostats_end_io,
 	.status = iostats_status,
 };
 
 int __init dm_iostats_init(void)
 {
-	int r;
-
-	/* allocate a slab for the dm_iostats */
-	_iostats_cache = kmem_cache_create("dm iostats io",
-					    sizeof(struct dm_iostats), 0, 0,
-					    NULL, NULL);
-
-	if (!_iostats_cache)
-		return -ENOMEM;
-
-	/* Create _iostats_pool mempool */
-	_iostats_pool = mempool_create(MIN_IOSTATS, mempool_alloc_slab,
-				       mempool_free_slab, _iostats_cache);
-
-	if (!_iostats_pool) {
-		kmem_cache_destroy(_iostats_cache);
-		return -ENOMEM;
-	}
-
-	r = dm_register_target(&_iostats_target);
-	if (r) {
-		mempool_destroy(_iostats_pool);
-		kmem_cache_destroy(_iostats_cache);
-	}
-
-	return 0;
+	return dm_register_target(&_iostats_target);
 }
 
 void __exit dm_iostats_exit(void)
@@ -294,16 +228,6 @@
 	int r = dm_unregister_target(&_iostats_target);
 	if (r)
 		DMERR("dm-iostats unregister failed %d", r);
-
-	if (_iostats_pool) {
-		mempool_destroy(_iostats_pool);
-		_iostats_pool = NULL;
-	}
-
-	if (_iostats_cache) {
-		kmem_cache_destroy(_iostats_cache);
-		_iostats_cache = NULL;
-	}
 }
 
 /*
--- diff/drivers/md/dm.c	2002-12-12 17:22:27.000000000 +0000
+++ source/drivers/md/dm.c	2002-12-16 15:34:33.000000000 +0000
@@ -29,6 +29,7 @@
 	struct mapped_device *md;
 
 	struct dm_target *ti;
+	int rw;
 	void (*end_io) (struct buffer_head * bh, int uptodate);
 	void *context;
 };
@@ -321,7 +322,8 @@
 		wake_up(&io->md->wait);
 
 		if (io->ti->type->end_io)
-			io->ti->type->end_io(io->ti, bh, uptodate ? 0 : -EIO);
+			io->ti->type->end_io(io->ti, bh, io->rw,
+					     uptodate ? 0 : -EIO);
 	}
 
 	bh->b_end_io = io->end_io;
@@ -352,6 +354,7 @@
 		atomic_inc(&md->pending);
 		io->md = md;
 		io->ti = ti;
+		io->rw = rw;
 		io->end_io = bh->b_end_io;
 		io->context = bh->b_private;
 		bh->b_end_io = dec_pending;
--- diff/include/linux/device-mapper.h	2002-12-12 17:17:48.000000000 +0000
+++ source/include/linux/device-mapper.h	2002-12-16 15:33:29.000000000 +0000
@@ -34,8 +34,8 @@
  * > 0: simple remap complete
  */
 typedef int (*dm_map_fn) (struct dm_target *ti, struct buffer_head *bh, int rw);
-typedef int (*dm_endio_fn) (struct dm_target *ti,
-			    struct buffer_head *bh, int error);
+typedef void (*dm_endio_fn) (struct dm_target *ti,
+			     struct buffer_head *bh, int rw, int error);
 typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type,
 			     char *result, int maxlen);