LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 0/7] ocfs2: Extended slot map
@ 2008-03-05 22:51 Joel Becker
  2008-03-05 22:51 ` [PATCH 1/7] ocfs2: Move slot map access into slot_map.c Joel Becker
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Joel Becker @ 2008-03-05 22:51 UTC (permalink / raw)
  To: ocfs2-devel; +Cc: linux-kernel, linux-fsdevel

ocfs2 has a system file called "slot_map".  A "slot" is a collection of
files local to particular mounted node, including the journal and
allocators that node is using.  The slot map converts the slot number to
a node number, so when a node dies, ocfs2 knows which slot to recover.

The old ocfs2 slot map is a very limited.  It has a physical maximum of
254 entries - specifically, it must fit within one disk block.  It only
allows node numbers up to 254, and cannot be extended past INT16_MAX
(32767).  This is a problem in the world of userspace cluster stacks,
where the node numbers are often sparse and can be up to UINT32_MAX.

It also has the structural problem that empty slots are signified by a
magic number.  That number happens to be -1 (0xFFFF).  It makes for code
that isn't as obvious as one would like.

Thus, we introduce a new slot map format, referred to hence as the
"extended slot map".  The extended slot map is allocated as regular file
space, and so is bound by i_size.  The new format adds a "valid" field,
distinct from the node number.  Finally, it has room for extension
should it be needed.

The kernel code is available on the 'new-slot-map' branch of my git
repository.

View:
http://oss.oracle.com/git/?p=jlbec/linux-2.6.git;a=shortlog;h=new-slot-map
Pull:
git pull git://oss.oracle.com/git/jlbec/linux-2.6.git new-slot-map

The tools code is also available via git, in the 'new-slot-map' branch
as well.

View:
http://oss.oracle.com/git/?p=ocfs2-tools.git;a=shortlog;h=new-slot-map
Pull:
git pull git://oss.oracle.com/git/ocfs2-tools.git new-slot-map




^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/7] ocfs2: Move slot map access into slot_map.c
  2008-03-05 22:51 [PATCH 0/7] ocfs2: Extended slot map Joel Becker
@ 2008-03-05 22:51 ` Joel Becker
  2008-03-05 22:51 ` [PATCH 2/7] ocfs2: Make ocfs2_slot_info private Joel Becker
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Joel Becker @ 2008-03-05 22:51 UTC (permalink / raw)
  To: ocfs2-devel; +Cc: linux-kernel, linux-fsdevel, Mark Fasheh

From: Mark Fasheh <mark.fasheh@oracle.com>

journal.c and dlmglue.c would refresh the slot map by hand.  Instead, have
the update and clear functions do the work inside slot_map.c.  The eventual
result is to make ocfs2_slot_info defined privately in slot_map.c

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.c  |    8 +-----
 fs/ocfs2/journal.c  |    3 +-
 fs/ocfs2/slot_map.c |   62 +++++++++++++++++++++++++++++++++++++++-----------
 fs/ocfs2/slot_map.h |   11 +++-----
 fs/ocfs2/super.c    |    3 +-
 5 files changed, 55 insertions(+), 32 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index f779430..33c8a65 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2132,8 +2132,6 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
 	int status = 0;
 	int level = ex ? LKM_EXMODE : LKM_PRMODE;
 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
-	struct buffer_head *bh;
-	struct ocfs2_slot_info *si = osb->slot_info;
 
 	mlog_entry_void();
 
@@ -2159,11 +2157,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
 		goto bail;
 	}
 	if (status) {
-		bh = si->si_bh;
-		status = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0,
-					  si->si_inode);
-		if (status == 0)
-			ocfs2_update_slot_info(si);
+		status = ocfs2_refresh_slot_info(osb);
 
 		ocfs2_complete_lock_res_refresh(lockres, status);
 
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f31c7e8..c2e654e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1123,8 +1123,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
 
 	/* Likewise, this would be a strange but ultimately not so
 	 * harmful place to get an error... */
-	ocfs2_clear_slot(si, slot_num);
-	status = ocfs2_update_disk_slots(osb, si);
+	status = ocfs2_clear_slot(osb, slot_num);
 	if (status < 0)
 		mlog_errno(status);
 
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 3a50ce5..f5727b8 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -49,7 +49,7 @@ static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
 			      s16 node_num);
 
 /* post the slot information on disk into our slot_info struct. */
-void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
+static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 {
 	int i;
 	__le16 *disk_info;
@@ -65,10 +65,27 @@ void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 	spin_unlock(&si->si_lock);
 }
 
+int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
+{
+	int ret;
+	struct ocfs2_slot_info *si = osb->slot_info;
+	struct buffer_head *bh;
+
+	if (si == NULL)
+		return 0;
+
+	bh = si->si_bh;
+	ret = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0, si->si_inode);
+	if (ret == 0)
+		ocfs2_update_slot_info(si);
+
+	return ret;
+}
+
 /* post the our slot info stuff into it's destination bh and write it
  * out. */
-int ocfs2_update_disk_slots(struct ocfs2_super *osb,
-			    struct ocfs2_slot_info *si)
+static int ocfs2_update_disk_slots(struct ocfs2_super *osb,
+				   struct ocfs2_slot_info *si)
 {
 	int status, i;
 	__le16 *disk_info = (__le16 *) si->si_bh->b_data;
@@ -135,6 +152,19 @@ s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 	return ret;
 }
 
+static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
+{
+	if (si == NULL)
+		return;
+
+	if (si->si_inode)
+		iput(si->si_inode);
+	if (si->si_bh)
+		brelse(si->si_bh);
+
+	kfree(si);
+}
+
 static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
 			      s16 slot_num,
 			      s16 node_num)
@@ -147,12 +177,18 @@ static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
 	si->si_global_node_nums[slot_num] = node_num;
 }
 
-void ocfs2_clear_slot(struct ocfs2_slot_info *si,
-		      s16 slot_num)
+int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num)
 {
+	struct ocfs2_slot_info *si = osb->slot_info;
+
+	if (si == NULL)
+		return 0;
+
 	spin_lock(&si->si_lock);
 	__ocfs2_fill_slot(si, slot_num, OCFS2_INVALID_SLOT);
 	spin_unlock(&si->si_lock);
+
+	return ocfs2_update_disk_slots(osb, osb->slot_info);
 }
 
 int ocfs2_init_slot_info(struct ocfs2_super *osb)
@@ -202,18 +238,17 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
 	osb->slot_info = si;
 bail:
 	if (status < 0 && si)
-		ocfs2_free_slot_info(si);
+		__ocfs2_free_slot_info(si);
 
 	return status;
 }
 
-void ocfs2_free_slot_info(struct ocfs2_slot_info *si)
+void ocfs2_free_slot_info(struct ocfs2_super *osb)
 {
-	if (si->si_inode)
-		iput(si->si_inode);
-	if (si->si_bh)
-		brelse(si->si_bh);
-	kfree(si);
+	struct ocfs2_slot_info *si = osb->slot_info;
+
+	osb->slot_info = NULL;
+	__ocfs2_free_slot_info(si);
 }
 
 int ocfs2_find_slot(struct ocfs2_super *osb)
@@ -285,7 +320,6 @@ void ocfs2_put_slot(struct ocfs2_super *osb)
 	}
 
 bail:
-	osb->slot_info = NULL;
-	ocfs2_free_slot_info(si);
+	ocfs2_free_slot_info(osb);
 }
 
diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h
index 1025872..b029ffd 100644
--- a/fs/ocfs2/slot_map.h
+++ b/fs/ocfs2/slot_map.h
@@ -30,7 +30,7 @@
 struct ocfs2_slot_info {
 	spinlock_t si_lock;
 
-       	struct inode *si_inode;
+	struct inode *si_inode;
 	struct buffer_head *si_bh;
 	unsigned int si_num_slots;
 	unsigned int si_size;
@@ -38,19 +38,16 @@ struct ocfs2_slot_info {
 };
 
 int ocfs2_init_slot_info(struct ocfs2_super *osb);
-void ocfs2_free_slot_info(struct ocfs2_slot_info *si);
+void ocfs2_free_slot_info(struct ocfs2_super *osb);
 
 int ocfs2_find_slot(struct ocfs2_super *osb);
 void ocfs2_put_slot(struct ocfs2_super *osb);
 
-void ocfs2_update_slot_info(struct ocfs2_slot_info *si);
-int ocfs2_update_disk_slots(struct ocfs2_super *osb,
-			    struct ocfs2_slot_info *si);
+int ocfs2_refresh_slot_info(struct ocfs2_super *osb);
 
 s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 			   s16 global);
-void ocfs2_clear_slot(struct ocfs2_slot_info *si,
-		      s16 slot_num);
+int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num);
 
 static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si,
 				      int slot_num)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index bec75af..fad37af 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1724,8 +1724,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
 
 	/* This function assumes that the caller has the main osb resource */
 
-	if (osb->slot_info)
-		ocfs2_free_slot_info(osb->slot_info);
+	ocfs2_free_slot_info(osb);
 
 	kfree(osb->osb_orphan_wipes);
 	/* FIXME
-- 
1.5.3.8


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/7] ocfs2: Make ocfs2_slot_info private.
  2008-03-05 22:51 [PATCH 0/7] ocfs2: Extended slot map Joel Becker
  2008-03-05 22:51 ` [PATCH 1/7] ocfs2: Move slot map access into slot_map.c Joel Becker
@ 2008-03-05 22:51 ` Joel Becker
  2008-03-05 22:52 ` [PATCH 3/7] ocfs2: Change the recovery map to an array of node numbers Joel Becker
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Joel Becker @ 2008-03-05 22:51 UTC (permalink / raw)
  To: ocfs2-devel; +Cc: linux-kernel, linux-fsdevel

Just use osb_lock around the ocfs2_slot_info data.  This allows us to
take the ocfs2_slot_info structure private in slot_info.c.  All access
is now via accessors.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/journal.c  |   24 +++++++-------
 fs/ocfs2/ocfs2.h    |    1 +
 fs/ocfs2/slot_map.c |   81 ++++++++++++++++++++++++++++++++++++---------------
 fs/ocfs2/slot_map.h |   25 ++-------------
 4 files changed, 74 insertions(+), 57 deletions(-)

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index c2e654e..ed0c6d0 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1079,7 +1079,6 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
 {
 	int status = 0;
 	int slot_num;
-	struct ocfs2_slot_info *si = osb->slot_info;
 	struct ocfs2_dinode *la_copy = NULL;
 	struct ocfs2_dinode *tl_copy = NULL;
 
@@ -1092,8 +1091,8 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
 	 * case we should've called ocfs2_journal_load instead. */
 	BUG_ON(osb->node_num == node_num);
 
-	slot_num = ocfs2_node_num_to_slot(si, node_num);
-	if (slot_num == OCFS2_INVALID_SLOT) {
+	slot_num = ocfs2_node_num_to_slot(osb, node_num);
+	if (slot_num == -ENOENT) {
 		status = 0;
 		mlog(0, "no slot for this node, so no recovery required.\n");
 		goto done;
@@ -1183,23 +1182,24 @@ bail:
  * slot info struct has been updated from disk. */
 int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
 {
-	int status, i, node_num;
-	struct ocfs2_slot_info *si = osb->slot_info;
+	unsigned int node_num;
+	int status, i;
 
 	/* This is called with the super block cluster lock, so we
 	 * know that the slot map can't change underneath us. */
 
-	spin_lock(&si->si_lock);
-	for(i = 0; i < si->si_num_slots; i++) {
+	spin_lock(&osb->osb_lock);
+	for (i = 0; i < osb->max_slots; i++) {
 		if (i == osb->slot_num)
 			continue;
-		if (ocfs2_is_empty_slot(si, i))
+
+		status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
+		if (status == -ENOENT)
 			continue;
 
-		node_num = si->si_global_node_nums[i];
 		if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num))
 			continue;
-		spin_unlock(&si->si_lock);
+		spin_unlock(&osb->osb_lock);
 
 		/* Ok, we have a slot occupied by another node which
 		 * is not in the recovery map. We trylock his journal
@@ -1215,9 +1215,9 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
 			goto bail;
 		}
 
-		spin_lock(&si->si_lock);
+		spin_lock(&osb->osb_lock);
 	}
-	spin_unlock(&si->si_lock);
+	spin_unlock(&osb->osb_lock);
 
 	status = 0;
 bail:
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6546cef..ee3f675 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -179,6 +179,7 @@ enum ocfs2_mount_options
 #define OCFS2_DEFAULT_ATIME_QUANTUM	60
 
 struct ocfs2_journal;
+struct ocfs2_slot_info;
 struct ocfs2_super
 {
 	struct task_struct *commit_task;
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index f5727b8..762360d 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -42,13 +42,25 @@
 
 #include "buffer_head_io.h"
 
+struct ocfs2_slot_info {
+	struct inode *si_inode;
+	struct buffer_head *si_bh;
+	unsigned int si_num_slots;
+	unsigned int si_size;
+	s16 si_global_node_nums[OCFS2_MAX_SLOTS];
+};
+
+
 static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 				    s16 global);
 static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
 			      s16 slot_num,
 			      s16 node_num);
 
-/* post the slot information on disk into our slot_info struct. */
+/*
+ * Post the slot information on disk into our slot_info struct.
+ * Must be protected by osb_lock.
+ */
 static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 {
 	int i;
@@ -56,13 +68,10 @@ static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 
 	/* we don't read the slot block here as ocfs2_super_lock
 	 * should've made sure we have the most recent copy. */
-	spin_lock(&si->si_lock);
 	disk_info = (__le16 *) si->si_bh->b_data;
 
 	for (i = 0; i < si->si_size; i++)
 		si->si_global_node_nums[i] = le16_to_cpu(disk_info[i]);
-
-	spin_unlock(&si->si_lock);
 }
 
 int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
@@ -76,8 +85,11 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 
 	bh = si->si_bh;
 	ret = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0, si->si_inode);
-	if (ret == 0)
+	if (ret == 0) {
+		spin_lock(&osb->osb_lock);
 		ocfs2_update_slot_info(si);
+		spin_unlock(&osb->osb_lock);
+	}
 
 	return ret;
 }
@@ -90,10 +102,10 @@ static int ocfs2_update_disk_slots(struct ocfs2_super *osb,
 	int status, i;
 	__le16 *disk_info = (__le16 *) si->si_bh->b_data;
 
-	spin_lock(&si->si_lock);
+	spin_lock(&osb->osb_lock);
 	for (i = 0; i < si->si_size; i++)
 		disk_info[i] = cpu_to_le16(si->si_global_node_nums[i]);
-	spin_unlock(&si->si_lock);
+	spin_unlock(&osb->osb_lock);
 
 	status = ocfs2_write_block(osb, si->si_bh, si->si_inode);
 	if (status < 0)
@@ -119,7 +131,8 @@ static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 	return ret;
 }
 
-static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred)
+static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
+				   s16 preferred)
 {
 	int i;
 	s16 ret = OCFS2_INVALID_SLOT;
@@ -141,15 +154,36 @@ out:
 	return ret;
 }
 
-s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
-			   s16 global)
+int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
 {
-	s16 ret;
+	s16 slot;
+	struct ocfs2_slot_info *si = osb->slot_info;
 
-	spin_lock(&si->si_lock);
-	ret = __ocfs2_node_num_to_slot(si, global);
-	spin_unlock(&si->si_lock);
-	return ret;
+	spin_lock(&osb->osb_lock);
+	slot = __ocfs2_node_num_to_slot(si, node_num);
+	spin_unlock(&osb->osb_lock);
+
+	if (slot == OCFS2_INVALID_SLOT)
+		return -ENOENT;
+
+	return slot;
+}
+
+int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
+				  unsigned int *node_num)
+{
+	struct ocfs2_slot_info *si = osb->slot_info;
+
+	assert_spin_locked(&osb->osb_lock);
+
+	BUG_ON(slot_num < 0);
+	BUG_ON(slot_num > osb->max_slots);
+
+	if (si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT)
+		return -ENOENT;
+
+	*node_num = si->si_global_node_nums[slot_num];
+	return 0;
 }
 
 static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
@@ -184,9 +218,9 @@ int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num)
 	if (si == NULL)
 		return 0;
 
-	spin_lock(&si->si_lock);
+	spin_lock(&osb->osb_lock);
 	__ocfs2_fill_slot(si, slot_num, OCFS2_INVALID_SLOT);
-	spin_unlock(&si->si_lock);
+	spin_unlock(&osb->osb_lock);
 
 	return ocfs2_update_disk_slots(osb, osb->slot_info);
 }
@@ -206,7 +240,6 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
 		goto bail;
 	}
 
-	spin_lock_init(&si->si_lock);
 	si->si_num_slots = osb->max_slots;
 	si->si_size = OCFS2_MAX_SLOTS;
 
@@ -235,7 +268,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
 
 	si->si_inode = inode;
 	si->si_bh = bh;
-	osb->slot_info = si;
+	osb->slot_info = (struct ocfs2_slot_info *)si;
 bail:
 	if (status < 0 && si)
 		__ocfs2_free_slot_info(si);
@@ -261,9 +294,9 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
 
 	si = osb->slot_info;
 
+	spin_lock(&osb->osb_lock);
 	ocfs2_update_slot_info(si);
 
-	spin_lock(&si->si_lock);
 	/* search for ourselves first and take the slot if it already
 	 * exists. Perhaps we need to mark this in a variable for our
 	 * own journal recovery? Possibly not, though we certainly
@@ -274,7 +307,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
 		 * one. */
 		slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
 		if (slot == OCFS2_INVALID_SLOT) {
-			spin_unlock(&si->si_lock);
+			spin_unlock(&osb->osb_lock);
 			mlog(ML_ERROR, "no free slots available!\n");
 			status = -EINVAL;
 			goto bail;
@@ -285,7 +318,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
 
 	__ocfs2_fill_slot(si, slot, osb->node_num);
 	osb->slot_num = slot;
-	spin_unlock(&si->si_lock);
+	spin_unlock(&osb->osb_lock);
 
 	mlog(0, "taking node slot %d\n", osb->slot_num);
 
@@ -306,12 +339,12 @@ void ocfs2_put_slot(struct ocfs2_super *osb)
 	if (!si)
 		return;
 
+	spin_lock(&osb->osb_lock);
 	ocfs2_update_slot_info(si);
 
-	spin_lock(&si->si_lock);
 	__ocfs2_fill_slot(si, osb->slot_num, OCFS2_INVALID_SLOT);
 	osb->slot_num = OCFS2_INVALID_SLOT;
-	spin_unlock(&si->si_lock);
+	spin_unlock(&osb->osb_lock);
 
 	status = ocfs2_update_disk_slots(osb, si);
 	if (status < 0) {
diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h
index b029ffd..5118e89 100644
--- a/fs/ocfs2/slot_map.h
+++ b/fs/ocfs2/slot_map.h
@@ -27,16 +27,6 @@
 #ifndef SLOTMAP_H
 #define SLOTMAP_H
 
-struct ocfs2_slot_info {
-	spinlock_t si_lock;
-
-	struct inode *si_inode;
-	struct buffer_head *si_bh;
-	unsigned int si_num_slots;
-	unsigned int si_size;
-	s16 si_global_node_nums[OCFS2_MAX_SLOTS];
-};
-
 int ocfs2_init_slot_info(struct ocfs2_super *osb);
 void ocfs2_free_slot_info(struct ocfs2_super *osb);
 
@@ -45,17 +35,10 @@ void ocfs2_put_slot(struct ocfs2_super *osb);
 
 int ocfs2_refresh_slot_info(struct ocfs2_super *osb);
 
-s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
-			   s16 global);
-int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num);
+int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num);
+int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
+				  unsigned int *node_num);
 
-static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si,
-				      int slot_num)
-{
-	BUG_ON(slot_num == OCFS2_INVALID_SLOT);
-	assert_spin_locked(&si->si_lock);
-
-	return si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT;
-}
+int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num);
 
 #endif
-- 
1.5.3.8


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/7] ocfs2: Change the recovery map to an array of node numbers.
  2008-03-05 22:51 [PATCH 0/7] ocfs2: Extended slot map Joel Becker
  2008-03-05 22:51 ` [PATCH 1/7] ocfs2: Move slot map access into slot_map.c Joel Becker
  2008-03-05 22:51 ` [PATCH 2/7] ocfs2: Make ocfs2_slot_info private Joel Becker
@ 2008-03-05 22:52 ` Joel Becker
  2008-03-05 22:52 ` [PATCH 4/7] ocfs2: slot_map I/O based on max_slots Joel Becker
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Joel Becker @ 2008-03-05 22:52 UTC (permalink / raw)
  To: ocfs2-devel; +Cc: linux-kernel, linux-fsdevel

The old recovery map was a bitmap of node numbers.  This was sufficient
for the maximum node number of 254.  Going forward, we want node numbers
to be UINT32.  Thus, we need a new recovery map.

Note that we can't keep track of slots here.  We must write down the
node number to recovery *before* we get the locks needed to convert a
node number into a slot number.

The recovery map is now an array of unsigned ints, max_slots in size.
It moves to journal.c with the rest of recovery.

Because it needs to be initialized, we move all of recovery initialization
into a new function, ocfs2_recovery_init().  This actually cleans up
ocfs2_initialize_super() a little as well.  Following on, recovery cleaup
becomes part of ocfs2_recovery_exit().

A number of node map functions are rendered obsolete and are removed.

Finally, waiting on recovery is wrapped in a function rather than naked
checks on the recovery_event.  This is a cleanup from Mark.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.c   |    6 +-
 fs/ocfs2/heartbeat.c |  111 ------------------------------
 fs/ocfs2/heartbeat.h |   14 ----
 fs/ocfs2/journal.c   |  181 +++++++++++++++++++++++++++++++++++++++++++++----
 fs/ocfs2/journal.h   |    4 +
 fs/ocfs2/ocfs2.h     |    3 +-
 fs/ocfs2/super.c     |   33 ++-------
 7 files changed, 182 insertions(+), 170 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 33c8a65..b4108fe 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1950,8 +1950,7 @@ int ocfs2_inode_lock_full(struct inode *inode,
 		goto local;
 
 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
-		wait_event(osb->recovery_event,
-			   ocfs2_node_map_is_empty(osb, &osb->recovery_map));
+		ocfs2_wait_for_recovery(osb);
 
 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
 	level = ex ? LKM_EXMODE : LKM_PRMODE;
@@ -1974,8 +1973,7 @@ int ocfs2_inode_lock_full(struct inode *inode,
 	 * committed to owning this lock so we don't allow signals to
 	 * abort the operation. */
 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
-		wait_event(osb->recovery_event,
-			   ocfs2_node_map_is_empty(osb, &osb->recovery_map));
+		ocfs2_wait_for_recovery(osb);
 
 local:
 	/*
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c
index 0758daf..80de239 100644
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -48,7 +48,6 @@ static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
 					    int bit);
 static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map,
 					      int bit);
-static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map);
 
 /* special case -1 for now
  * TODO: should *really* make sure the calling func never passes -1!!  */
@@ -62,7 +61,6 @@ static void ocfs2_node_map_init(struct ocfs2_node_map *map)
 void ocfs2_init_node_maps(struct ocfs2_super *osb)
 {
 	spin_lock_init(&osb->node_map_lock);
-	ocfs2_node_map_init(&osb->recovery_map);
 	ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs);
 }
 
@@ -192,112 +190,3 @@ int ocfs2_node_map_test_bit(struct ocfs2_super *osb,
 	return ret;
 }
 
-static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map)
-{
-	int bit;
-	bit = find_next_bit(map->map, map->num_nodes, 0);
-	if (bit < map->num_nodes)
-		return 0;
-	return 1;
-}
-
-int ocfs2_node_map_is_empty(struct ocfs2_super *osb,
-			    struct ocfs2_node_map *map)
-{
-	int ret;
-	BUG_ON(map->num_nodes == 0);
-	spin_lock(&osb->node_map_lock);
-	ret = __ocfs2_node_map_is_empty(map);
-	spin_unlock(&osb->node_map_lock);
-	return ret;
-}
-
-#if 0
-
-static void __ocfs2_node_map_dup(struct ocfs2_node_map *target,
-				 struct ocfs2_node_map *from)
-{
-	BUG_ON(from->num_nodes == 0);
-	ocfs2_node_map_init(target);
-	__ocfs2_node_map_set(target, from);
-}
-
-/* returns 1 if bit is the only bit set in target, 0 otherwise */
-int ocfs2_node_map_is_only(struct ocfs2_super *osb,
-			   struct ocfs2_node_map *target,
-			   int bit)
-{
-	struct ocfs2_node_map temp;
-	int ret;
-
-	spin_lock(&osb->node_map_lock);
-	__ocfs2_node_map_dup(&temp, target);
-	__ocfs2_node_map_clear_bit(&temp, bit);
-	ret = __ocfs2_node_map_is_empty(&temp);
-	spin_unlock(&osb->node_map_lock);
-
-	return ret;
-}
-
-static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
-				 struct ocfs2_node_map *from)
-{
-	int num_longs, i;
-
-	BUG_ON(target->num_nodes != from->num_nodes);
-	BUG_ON(target->num_nodes == 0);
-
-	num_longs = BITS_TO_LONGS(target->num_nodes);
-	for (i = 0; i < num_longs; i++)
-		target->map[i] = from->map[i];
-}
-
-#endif  /*  0  */
-
-/* Returns whether the recovery bit was actually set - it may not be
- * if a node is still marked as needing recovery */
-int ocfs2_recovery_map_set(struct ocfs2_super *osb,
-			   int num)
-{
-	int set = 0;
-
-	spin_lock(&osb->node_map_lock);
-
-	if (!test_bit(num, osb->recovery_map.map)) {
-	    __ocfs2_node_map_set_bit(&osb->recovery_map, num);
-	    set = 1;
-	}
-
-	spin_unlock(&osb->node_map_lock);
-
-	return set;
-}
-
-void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
-			      int num)
-{
-	ocfs2_node_map_clear_bit(osb, &osb->recovery_map, num);
-}
-
-int ocfs2_node_map_iterate(struct ocfs2_super *osb,
-			   struct ocfs2_node_map *map,
-			   int idx)
-{
-	int i = idx;
-
-	idx = O2NM_INVALID_NODE_NUM;
-	spin_lock(&osb->node_map_lock);
-	if ((i != O2NM_INVALID_NODE_NUM) &&
-	    (i >= 0) &&
-	    (i < map->num_nodes)) {
-		while(i < map->num_nodes) {
-			if (test_bit(i, map->map)) {
-				idx = i;
-				break;
-			}
-			i++;
-		}
-	}
-	spin_unlock(&osb->node_map_lock);
-	return idx;
-}
diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h
index eac63ae..98d8ffc 100644
--- a/fs/ocfs2/heartbeat.h
+++ b/fs/ocfs2/heartbeat.h
@@ -33,8 +33,6 @@ void ocfs2_stop_heartbeat(struct ocfs2_super *osb);
 
 /* node map functions - used to keep track of mounted and in-recovery
  * nodes. */
-int ocfs2_node_map_is_empty(struct ocfs2_super *osb,
-			    struct ocfs2_node_map *map);
 void ocfs2_node_map_set_bit(struct ocfs2_super *osb,
 			    struct ocfs2_node_map *map,
 			    int bit);
@@ -44,17 +42,5 @@ void ocfs2_node_map_clear_bit(struct ocfs2_super *osb,
 int ocfs2_node_map_test_bit(struct ocfs2_super *osb,
 			    struct ocfs2_node_map *map,
 			    int bit);
-int ocfs2_node_map_iterate(struct ocfs2_super *osb,
-			   struct ocfs2_node_map *map,
-			   int idx);
-static inline int ocfs2_node_map_first_set_bit(struct ocfs2_super *osb,
-					       struct ocfs2_node_map *map)
-{
-	return ocfs2_node_map_iterate(osb, map, 0);
-}
-int ocfs2_recovery_map_set(struct ocfs2_super *osb,
-			   int num);
-void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
-			      int num);
 
 #endif /* OCFS2_HEARTBEAT_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index ed0c6d0..fe679f3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -64,6 +64,137 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 				 int slot);
 static int ocfs2_commit_thread(void *arg);
 
+
+/*
+ * The recovery_list is a simple linked list of node numbers to recover.
+ * It is protected by the recovery_lock.
+ */
+
+struct ocfs2_recovery_map {
+	int rm_used;
+	unsigned int *rm_entries;
+};
+
+int ocfs2_recovery_init(struct ocfs2_super *osb)
+{
+	struct ocfs2_recovery_map *rm;
+
+	mutex_init(&osb->recovery_lock);
+	osb->disable_recovery = 0;
+	osb->recovery_thread_task = NULL;
+	init_waitqueue_head(&osb->recovery_event);
+
+	rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
+		     osb->max_slots * sizeof(unsigned int),
+		     GFP_KERNEL);
+	if (!rm) {
+		mlog_errno(-ENOMEM);
+		return -ENOMEM;
+	}
+
+	rm->rm_entries = (unsigned int *)((char *)rm +
+					  sizeof(struct ocfs2_recovery_map));
+	osb->recovery_map = rm;
+
+	return 0;
+}
+
+/* we can't grab the goofy sem lock from inside wait_event, so we use
+ * memory barriers to make sure that we'll see the null task before
+ * being woken up */
+static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
+{
+	mb();
+	return osb->recovery_thread_task != NULL;
+}
+
+void ocfs2_recovery_exit(struct ocfs2_super *osb)
+{
+	struct ocfs2_recovery_map *rm;
+
+	/* disable any new recovery threads and wait for any currently
+	 * running ones to exit. Do this before setting the vol_state. */
+	mutex_lock(&osb->recovery_lock);
+	osb->disable_recovery = 1;
+	mutex_unlock(&osb->recovery_lock);
+	wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
+
+	/* At this point, we know that no more recovery threads can be
+	 * launched, so wait for any recovery completion work to
+	 * complete. */
+	flush_workqueue(ocfs2_wq);
+
+	/*
+	 * Now that recovery is shut down, and the osb is about to be
+	 * freed,  the osb_lock is not taken here.
+	 */
+	rm = osb->recovery_map;
+	/* XXX: Should we bug if there are dirty entries? */
+
+	kfree(rm);
+}
+
+/* Behaves like test-and-set.  Returns the previous value */
+static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
+				     unsigned int node_num)
+{
+	int i;
+	struct ocfs2_recovery_map *rm = osb->recovery_map;
+
+	assert_spin_locked(&osb->osb_lock);
+
+	for (i = 0; i < rm->rm_used; i++) {
+		if (rm->rm_entries[i] == node_num)
+			return 1;
+	}
+
+	return 0;
+}
+
+static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
+				  unsigned int node_num)
+{
+	struct ocfs2_recovery_map *rm = osb->recovery_map;
+
+	spin_lock(&osb->osb_lock);
+	if (__ocfs2_recovery_map_test(osb, node_num)) {
+		spin_unlock(&osb->osb_lock);
+		return 1;
+	}
+
+	/* XXX: Can this be exploited? Not from o2dlm... */
+	BUG_ON(rm->rm_used >= osb->max_slots);
+
+	rm->rm_entries[rm->rm_used] = node_num;
+	rm->rm_used++;
+	spin_unlock(&osb->osb_lock);
+
+	return 0;
+}
+
+static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
+				     unsigned int node_num)
+{
+	int i;
+	struct ocfs2_recovery_map *rm = osb->recovery_map;
+
+	spin_lock(&osb->osb_lock);
+
+	for (i = 0; i < rm->rm_used; i++) {
+		if (rm->rm_entries[i] == node_num)
+			break;
+	}
+
+	if (i < rm->rm_used) {
+		/* XXX: be careful with the pointer math */
+		memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
+			(rm->rm_used - i - 1) * sizeof(unsigned int));
+		rm->rm_used--;
+	}
+
+	spin_unlock(&osb->osb_lock);
+}
+
 static int ocfs2_commit_cache(struct ocfs2_super *osb)
 {
 	int status = 0;
@@ -650,6 +781,23 @@ bail:
 	return status;
 }
 
+static int ocfs2_recovery_completed(struct ocfs2_super *osb)
+{
+	int empty;
+	struct ocfs2_recovery_map *rm = osb->recovery_map;
+
+	spin_lock(&osb->osb_lock);
+	empty = (rm->rm_used == 0);
+	spin_unlock(&osb->osb_lock);
+
+	return empty;
+}
+
+void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
+{
+	wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
+}
+
 /*
  * JBD Might read a cached version of another nodes journal file. We
  * don't want this as this file changes often and we get no
@@ -848,6 +996,7 @@ static int __ocfs2_recovery_thread(void *arg)
 {
 	int status, node_num;
 	struct ocfs2_super *osb = arg;
+	struct ocfs2_recovery_map *rm = osb->recovery_map;
 
 	mlog_entry_void();
 
@@ -863,26 +1012,29 @@ restart:
 		goto bail;
 	}
 
-	while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {
-		node_num = ocfs2_node_map_first_set_bit(osb,
-							&osb->recovery_map);
-		if (node_num == O2NM_INVALID_NODE_NUM) {
-			mlog(0, "Out of nodes to recover.\n");
-			break;
-		}
+	spin_lock(&osb->osb_lock);
+	while (rm->rm_used) {
+		/* It's always safe to remove entry zero, as we won't
+		 * clear it until ocfs2_recover_node() has succeeded. */
+		node_num = rm->rm_entries[0];
+		spin_unlock(&osb->osb_lock);
 
 		status = ocfs2_recover_node(osb, node_num);
-		if (status < 0) {
+		if (!status) {
+			ocfs2_recovery_map_clear(osb, node_num);
+		} else {
 			mlog(ML_ERROR,
 			     "Error %d recovering node %d on device (%u,%u)!\n",
 			     status, node_num,
 			     MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
 			mlog(ML_ERROR, "Volume requires unmount.\n");
-			continue;
 		}
 
-		ocfs2_recovery_map_clear(osb, node_num);
+		spin_lock(&osb->osb_lock);
 	}
+	spin_unlock(&osb->osb_lock);
+	mlog(0, "All nodes recovered\n");
+
 	ocfs2_super_unlock(osb, 1);
 
 	/* We always run recovery on our own orphan dir - the dead
@@ -893,8 +1045,7 @@ restart:
 
 bail:
 	mutex_lock(&osb->recovery_lock);
-	if (!status &&
-	    !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {
+	if (!status && !ocfs2_recovery_completed(osb)) {
 		mutex_unlock(&osb->recovery_lock);
 		goto restart;
 	}
@@ -924,8 +1075,8 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
 
 	/* People waiting on recovery will wait on
 	 * the recovery map to empty. */
-	if (!ocfs2_recovery_map_set(osb, node_num))
-		mlog(0, "node %d already be in recovery.\n", node_num);
+	if (ocfs2_recovery_map_set(osb, node_num))
+		mlog(0, "node %d already in recovery map.\n", node_num);
 
 	mlog(0, "starting recovery thread...\n");
 
@@ -1197,7 +1348,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
 		if (status == -ENOENT)
 			continue;
 
-		if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num))
+		if (__ocfs2_recovery_map_test(osb, node_num))
 			continue;
 		spin_unlock(&osb->osb_lock);
 
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 220f3e8..db82be2 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -134,6 +134,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
 
 /* Exported only for the journal struct init code in super.c. Do not call. */
 void ocfs2_complete_recovery(struct work_struct *work);
+void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
+
+int ocfs2_recovery_init(struct ocfs2_super *osb);
+void ocfs2_recovery_exit(struct ocfs2_super *osb);
 
 /*
  *  Journal Control:
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index ee3f675..c6ed8c3 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -180,6 +180,7 @@ enum ocfs2_mount_options
 
 struct ocfs2_journal;
 struct ocfs2_slot_info;
+struct ocfs2_recovery_map;
 struct ocfs2_super
 {
 	struct task_struct *commit_task;
@@ -191,7 +192,6 @@ struct ocfs2_super
 	struct ocfs2_slot_info *slot_info;
 
 	spinlock_t node_map_lock;
-	struct ocfs2_node_map recovery_map;
 
 	u64 root_blkno;
 	u64 system_dir_blkno;
@@ -226,6 +226,7 @@ struct ocfs2_super
 
 	atomic_t vol_state;
 	struct mutex recovery_lock;
+	struct ocfs2_recovery_map *recovery_map;
 	struct task_struct *recovery_thread_task;
 	int disable_recovery;
 	wait_queue_head_t checkpoint_event;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index fad37af..1a4c7c7 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1224,15 +1224,6 @@ leave:
 	return status;
 }
 
-/* we can't grab the goofy sem lock from inside wait_event, so we use
- * memory barriers to make sure that we'll see the null task before
- * being woken up */
-static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
-{
-	mb();
-	return osb->recovery_thread_task != NULL;
-}
-
 static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 {
 	int tmp;
@@ -1249,17 +1240,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
 	ocfs2_truncate_log_shutdown(osb);
 
-	/* disable any new recovery threads and wait for any currently
-	 * running ones to exit. Do this before setting the vol_state. */
-	mutex_lock(&osb->recovery_lock);
-	osb->disable_recovery = 1;
-	mutex_unlock(&osb->recovery_lock);
-	wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
-
-	/* At this point, we know that no more recovery threads can be
-	 * launched, so wait for any recovery completion work to
-	 * complete. */
-	flush_workqueue(ocfs2_wq);
+	/* This will disable recovery and flush any recovery work. */
+	ocfs2_recovery_exit(osb);
 
 	ocfs2_journal_shutdown(osb);
 
@@ -1368,7 +1350,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	osb->s_sectsize_bits = blksize_bits(sector_size);
 	BUG_ON(!osb->s_sectsize_bits);
 
-	init_waitqueue_head(&osb->recovery_event);
 	spin_lock_init(&osb->dc_task_lock);
 	init_waitqueue_head(&osb->dc_event);
 	osb->dc_work_sequence = 0;
@@ -1388,10 +1369,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
 		 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
 
-	mutex_init(&osb->recovery_lock);
-
-	osb->disable_recovery = 0;
-	osb->recovery_thread_task = NULL;
+	status = ocfs2_recovery_init(osb);
+	if (status) {
+		mlog(ML_ERROR, "Unable to initialize recovery state\n");
+		mlog_errno(status);
+		goto bail;
+	}
 
 	init_waitqueue_head(&osb->checkpoint_event);
 	atomic_set(&osb->needs_checkpoint, 0);
-- 
1.5.3.8


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/7] ocfs2: slot_map I/O based on max_slots.
  2008-03-05 22:51 [PATCH 0/7] ocfs2: Extended slot map Joel Becker
                   ` (2 preceding siblings ...)
  2008-03-05 22:52 ` [PATCH 3/7] ocfs2: Change the recovery map to an array of node numbers Joel Becker
@ 2008-03-05 22:52 ` Joel Becker
  2008-03-05 22:52 ` [PATCH 5/7] ocfs2: De-magic the in-memory slot map Joel Becker
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Joel Becker @ 2008-03-05 22:52 UTC (permalink / raw)
  To: ocfs2-devel; +Cc: linux-kernel, linux-fsdevel

The slot map code assumed a slot_map file has one block allocated.
This changes the code to I/O as many blocks as will cover max_slots.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/slot_map.c |  128 +++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 108 insertions(+), 20 deletions(-)

diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 762360d..5bddee1 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -44,7 +44,8 @@
 
 struct ocfs2_slot_info {
 	struct inode *si_inode;
-	struct buffer_head *si_bh;
+	unsigned int si_blocks;
+	struct buffer_head **si_bh;
 	unsigned int si_num_slots;
 	unsigned int si_size;
 	s16 si_global_node_nums[OCFS2_MAX_SLOTS];
@@ -68,7 +69,7 @@ static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 
 	/* we don't read the slot block here as ocfs2_super_lock
 	 * should've made sure we have the most recent copy. */
-	disk_info = (__le16 *) si->si_bh->b_data;
+	disk_info = (__le16 *) si->si_bh[0]->b_data;
 
 	for (i = 0; i < si->si_size; i++)
 		si->si_global_node_nums[i] = le16_to_cpu(disk_info[i]);
@@ -78,13 +79,23 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 {
 	int ret;
 	struct ocfs2_slot_info *si = osb->slot_info;
-	struct buffer_head *bh;
 
 	if (si == NULL)
 		return 0;
 
-	bh = si->si_bh;
-	ret = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0, si->si_inode);
+	BUG_ON(si->si_blocks == 0);
+	BUG_ON(si->si_bh == NULL);
+
+	mlog(0, "Refreshing slot map, reading %u block(s)\n",
+	     si->si_blocks);
+
+	/*
+	 * We pass -1 as blocknr because we expect all of si->si_bh to
+	 * be !NULL.  Thus, ocfs2_read_blocks() will ignore blocknr.  If
+	 * this is not true, the read of -1 (UINT64_MAX) will fail.
+	 */
+	ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0,
+				si->si_inode);
 	if (ret == 0) {
 		spin_lock(&osb->osb_lock);
 		ocfs2_update_slot_info(si);
@@ -100,20 +111,42 @@ static int ocfs2_update_disk_slots(struct ocfs2_super *osb,
 				   struct ocfs2_slot_info *si)
 {
 	int status, i;
-	__le16 *disk_info = (__le16 *) si->si_bh->b_data;
+	__le16 *disk_info = (__le16 *) si->si_bh[0]->b_data;
 
 	spin_lock(&osb->osb_lock);
 	for (i = 0; i < si->si_size; i++)
 		disk_info[i] = cpu_to_le16(si->si_global_node_nums[i]);
 	spin_unlock(&osb->osb_lock);
 
-	status = ocfs2_write_block(osb, si->si_bh, si->si_inode);
+	status = ocfs2_write_block(osb, si->si_bh[0], si->si_inode);
 	if (status < 0)
 		mlog_errno(status);
 
 	return status;
 }
 
+/*
+ * Calculate how many bytes are needed by the slot map.  Returns
+ * an error if the slot map file is too small.
+ */
+static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
+					struct inode *inode,
+					unsigned long long *bytes)
+{
+	unsigned long long bytes_needed;
+
+	bytes_needed = osb->max_slots * sizeof(__le16);
+	if (bytes_needed > i_size_read(inode)) {
+		mlog(ML_ERROR,
+		     "Slot map file is too small!  (size %llu, needed %llu)\n",
+		     i_size_read(inode), bytes_needed);
+		return -ENOSPC;
+	}
+
+	*bytes = bytes_needed;
+	return 0;
+}
+
 /* try to find global node in the slot info. Returns
  * OCFS2_INVALID_SLOT if nothing is found. */
 static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
@@ -188,13 +221,22 @@ int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
 
 static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
 {
+	unsigned int i;
+
 	if (si == NULL)
 		return;
 
 	if (si->si_inode)
 		iput(si->si_inode);
-	if (si->si_bh)
-		brelse(si->si_bh);
+	if (si->si_bh) {
+		for (i = 0; i < si->si_blocks; i++) {
+			if (si->si_bh[i]) {
+				brelse(si->si_bh[i]);
+				si->si_bh[i] = NULL;
+			}
+		}
+		kfree(si->si_bh);
+	}
 
 	kfree(si);
 }
@@ -225,12 +267,65 @@ int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num)
 	return ocfs2_update_disk_slots(osb, osb->slot_info);
 }
 
+static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
+				  struct ocfs2_slot_info *si)
+{
+	int status = 0;
+	u64 blkno;
+	unsigned long long blocks, bytes;
+	unsigned int i;
+	struct buffer_head *bh;
+
+	status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
+	if (status)
+		goto bail;
+
+	blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
+	BUG_ON(blocks > UINT_MAX);
+	si->si_blocks = blocks;
+	if (!si->si_blocks)
+		goto bail;
+
+	mlog(0, "Slot map needs %u buffers for %llu bytes\n",
+	     si->si_blocks, bytes);
+
+	si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
+			    GFP_KERNEL);
+	if (!si->si_bh) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto bail;
+	}
+
+	for (i = 0; i < si->si_blocks; i++) {
+		status = ocfs2_extent_map_get_blocks(si->si_inode, i,
+						     &blkno, NULL, NULL);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
+
+		mlog(0, "Reading slot map block %u at %llu\n", i,
+		     (unsigned long long)blkno);
+
+		bh = NULL;  /* Acquire a fresh bh */
+		status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
+
+		si->si_bh[i] = bh;
+	}
+
+bail:
+	return status;
+}
+
 int ocfs2_init_slot_info(struct ocfs2_super *osb)
 {
 	int status, i;
-	u64 blkno;
 	struct inode *inode = NULL;
-	struct buffer_head *bh = NULL;
 	struct ocfs2_slot_info *si;
 
 	si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL);
@@ -254,20 +349,13 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
 		goto bail;
 	}
 
-	status = ocfs2_extent_map_get_blocks(inode, 0ULL, &blkno, NULL, NULL);
-	if (status < 0) {
-		mlog_errno(status);
-		goto bail;
-	}
-
-	status = ocfs2_read_block(osb, blkno, &bh, 0, inode);
+	si->si_inode = inode;
+	status = ocfs2_map_slot_buffers(osb, si);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 
-	si->si_inode = inode;
-	si->si_bh = bh;
 	osb->slot_info = (struct ocfs2_slot_info *)si;
 bail:
 	if (status < 0 && si)
-- 
1.5.3.8


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 5/7] ocfs2: De-magic the in-memory slot map.
  2008-03-05 22:51 [PATCH 0/7] ocfs2: Extended slot map Joel Becker
                   ` (3 preceding siblings ...)
  2008-03-05 22:52 ` [PATCH 4/7] ocfs2: slot_map I/O based on max_slots Joel Becker
@ 2008-03-05 22:52 ` Joel Becker
  2008-03-05 22:52 ` [PATCH 6/7] ocfs2: Define the contents of the slot_map file Joel Becker
  2008-03-05 22:52 ` [PATCH 7/7] ocfs2: New slot map format Joel Becker
  6 siblings, 0 replies; 8+ messages in thread
From: Joel Becker @ 2008-03-05 22:52 UTC (permalink / raw)
  To: ocfs2-devel; +Cc: linux-kernel, linux-fsdevel

The in-memory slot map uses the same magic as the on-disk one.  There is
a special value to mark a slot as invalid.  It relies on the size of
certain types and so on.

Write a new in-memory map that keeps validity as a separate field.  Outside
of the I/O functions, OCFS2_INVALID_SLOT now means what it is supposed to.
It also is no longer tied to the type size.

This also means that only the I/O functions refer to 16bit quantities.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/journal.c  |    2 +-
 fs/ocfs2/ocfs2.h    |    6 +-
 fs/ocfs2/slot_map.c |  130 ++++++++++++++++++++++++++++-----------------------
 fs/ocfs2/slot_map.h |    2 +-
 4 files changed, 77 insertions(+), 63 deletions(-)

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index fe679f3..4cd982a 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -71,7 +71,7 @@ static int ocfs2_commit_thread(void *arg);
  */
 
 struct ocfs2_recovery_map {
-	int rm_used;
+	unsigned int rm_used;
 	unsigned int *rm_entries;
 };
 
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c6ed8c3..95f783d 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -216,10 +216,10 @@ struct ocfs2_super
 	unsigned long s_mount_opt;
 	unsigned int s_atime_quantum;
 
-	u16 max_slots;
+	unsigned int max_slots;
 	s16 node_num;
-	s16 slot_num;
-	s16 preferred_slot;
+	int slot_num;
+	int preferred_slot;
 	int s_sectsize_bits;
 	int s_clustersize;
 	int s_clustersize_bits;
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 5bddee1..65a61bf 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -42,21 +42,41 @@
 
 #include "buffer_head_io.h"
 
+
+struct ocfs2_slot {
+	int sl_valid;
+	unsigned int sl_node_num;
+};
+
 struct ocfs2_slot_info {
 	struct inode *si_inode;
 	unsigned int si_blocks;
 	struct buffer_head **si_bh;
 	unsigned int si_num_slots;
-	unsigned int si_size;
-	s16 si_global_node_nums[OCFS2_MAX_SLOTS];
+	struct ocfs2_slot *si_slots;
 };
 
 
-static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
-				    s16 global);
-static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
-			      s16 slot_num,
-			      s16 node_num);
+static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
+				    unsigned int node_num);
+
+static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
+				  int slot_num)
+{
+	BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
+	si->si_slots[slot_num].sl_valid = 0;
+}
+
+static void ocfs2_set_slot(struct ocfs2_slot_info *si,
+			   int slot_num, unsigned int node_num)
+{
+	BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
+	BUG_ON((node_num == O2NM_INVALID_NODE_NUM) ||
+	       (node_num >= O2NM_MAX_NODES));
+
+	si->si_slots[slot_num].sl_valid = 1;
+	si->si_slots[slot_num].sl_node_num = node_num;
+}
 
 /*
  * Post the slot information on disk into our slot_info struct.
@@ -71,8 +91,12 @@ static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 	 * should've made sure we have the most recent copy. */
 	disk_info = (__le16 *) si->si_bh[0]->b_data;
 
-	for (i = 0; i < si->si_size; i++)
-		si->si_global_node_nums[i] = le16_to_cpu(disk_info[i]);
+	for (i = 0; i < si->si_num_slots; i++) {
+		if (le16_to_cpu(disk_info[i]) == (u16)OCFS2_INVALID_SLOT)
+			ocfs2_invalidate_slot(si, i);
+		else
+			ocfs2_set_slot(si, i, le16_to_cpu(disk_info[i]));
+	}
 }
 
 int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
@@ -114,8 +138,13 @@ static int ocfs2_update_disk_slots(struct ocfs2_super *osb,
 	__le16 *disk_info = (__le16 *) si->si_bh[0]->b_data;
 
 	spin_lock(&osb->osb_lock);
-	for (i = 0; i < si->si_size; i++)
-		disk_info[i] = cpu_to_le16(si->si_global_node_nums[i]);
+	for (i = 0; i < si->si_num_slots; i++) {
+		if (si->si_slots[i].sl_valid)
+			disk_info[i] =
+				cpu_to_le16(si->si_slots[i].sl_node_num);
+		else
+			disk_info[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
+	}
 	spin_unlock(&osb->osb_lock);
 
 	status = ocfs2_write_block(osb, si->si_bh[0], si->si_inode);
@@ -147,39 +176,39 @@ static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
 	return 0;
 }
 
-/* try to find global node in the slot info. Returns
- * OCFS2_INVALID_SLOT if nothing is found. */
-static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
-				    s16 global)
+/* try to find global node in the slot info. Returns -ENOENT
+ * if nothing is found. */
+static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
+				    unsigned int node_num)
 {
-	int i;
-	s16 ret = OCFS2_INVALID_SLOT;
+	int i, ret = -ENOENT;
 
 	for(i = 0; i < si->si_num_slots; i++) {
-		if (global == si->si_global_node_nums[i]) {
-			ret = (s16) i;
+		if (si->si_slots[i].sl_valid &&
+		    (node_num == si->si_slots[i].sl_node_num)) {
+			ret = i;
 			break;
 		}
 	}
+
 	return ret;
 }
 
-static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
-				   s16 preferred)
+static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
+				   int preferred)
 {
-	int i;
-	s16 ret = OCFS2_INVALID_SLOT;
+	int i, ret = -ENOSPC;
 
-	if (preferred >= 0 && preferred < si->si_num_slots) {
-		if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) {
+	if ((preferred >= 0) && (preferred < si->si_num_slots)) {
+		if (!si->si_slots[preferred].sl_valid) {
 			ret = preferred;
 			goto out;
 		}
 	}
 
 	for(i = 0; i < si->si_num_slots; i++) {
-		if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) {
-			ret = (s16) i;
+		if (!si->si_slots[i].sl_valid) {
+			ret = i;
 			break;
 		}
 	}
@@ -189,16 +218,13 @@ out:
 
 int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
 {
-	s16 slot;
+	int slot;
 	struct ocfs2_slot_info *si = osb->slot_info;
 
 	spin_lock(&osb->osb_lock);
 	slot = __ocfs2_node_num_to_slot(si, node_num);
 	spin_unlock(&osb->osb_lock);
 
-	if (slot == OCFS2_INVALID_SLOT)
-		return -ENOENT;
-
 	return slot;
 }
 
@@ -212,10 +238,10 @@ int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
 	BUG_ON(slot_num < 0);
 	BUG_ON(slot_num > osb->max_slots);
 
-	if (si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT)
+	if (!si->si_slots[slot_num].sl_valid)
 		return -ENOENT;
 
-	*node_num = si->si_global_node_nums[slot_num];
+	*node_num = si->si_slots[slot_num].sl_node_num;
 	return 0;
 }
 
@@ -241,19 +267,7 @@ static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
 	kfree(si);
 }
 
-static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
-			      s16 slot_num,
-			      s16 node_num)
-{
-	BUG_ON(slot_num == OCFS2_INVALID_SLOT);
-	BUG_ON(slot_num >= si->si_num_slots);
-	BUG_ON((node_num != O2NM_INVALID_NODE_NUM) &&
-	       (node_num >= O2NM_MAX_NODES));
-
-	si->si_global_node_nums[slot_num] = node_num;
-}
-
-int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num)
+int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
 {
 	struct ocfs2_slot_info *si = osb->slot_info;
 
@@ -261,7 +275,7 @@ int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num)
 		return 0;
 
 	spin_lock(&osb->osb_lock);
-	__ocfs2_fill_slot(si, slot_num, OCFS2_INVALID_SLOT);
+	ocfs2_invalidate_slot(si, slot_num);
 	spin_unlock(&osb->osb_lock);
 
 	return ocfs2_update_disk_slots(osb, osb->slot_info);
@@ -324,11 +338,13 @@ bail:
 
 int ocfs2_init_slot_info(struct ocfs2_super *osb)
 {
-	int status, i;
+	int status;
 	struct inode *inode = NULL;
 	struct ocfs2_slot_info *si;
 
-	si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL);
+	si = kzalloc(sizeof(struct ocfs2_slot_info) +
+		     (sizeof(struct ocfs2_slot) * osb->max_slots),
+		     GFP_KERNEL);
 	if (!si) {
 		status = -ENOMEM;
 		mlog_errno(status);
@@ -336,10 +352,8 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
 	}
 
 	si->si_num_slots = osb->max_slots;
-	si->si_size = OCFS2_MAX_SLOTS;
-
-	for(i = 0; i < si->si_num_slots; i++)
-		si->si_global_node_nums[i] = OCFS2_INVALID_SLOT;
+	si->si_slots = (struct ocfs2_slot *)((char *)si +
+					     sizeof(struct ocfs2_slot_info));
 
 	inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
 					    OCFS2_INVALID_SLOT);
@@ -375,7 +389,7 @@ void ocfs2_free_slot_info(struct ocfs2_super *osb)
 int ocfs2_find_slot(struct ocfs2_super *osb)
 {
 	int status;
-	s16 slot;
+	int slot;
 	struct ocfs2_slot_info *si;
 
 	mlog_entry_void();
@@ -390,11 +404,11 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
 	 * own journal recovery? Possibly not, though we certainly
 	 * need to warn to the user */
 	slot = __ocfs2_node_num_to_slot(si, osb->node_num);
-	if (slot == OCFS2_INVALID_SLOT) {
+	if (slot < 0) {
 		/* if no slot yet, then just take 1st available
 		 * one. */
 		slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
-		if (slot == OCFS2_INVALID_SLOT) {
+		if (slot < 0) {
 			spin_unlock(&osb->osb_lock);
 			mlog(ML_ERROR, "no free slots available!\n");
 			status = -EINVAL;
@@ -404,7 +418,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
 		mlog(ML_NOTICE, "slot %d is already allocated to this node!\n",
 		     slot);
 
-	__ocfs2_fill_slot(si, slot, osb->node_num);
+	ocfs2_set_slot(si, slot, osb->node_num);
 	osb->slot_num = slot;
 	spin_unlock(&osb->osb_lock);
 
@@ -430,7 +444,7 @@ void ocfs2_put_slot(struct ocfs2_super *osb)
 	spin_lock(&osb->osb_lock);
 	ocfs2_update_slot_info(si);
 
-	__ocfs2_fill_slot(si, osb->slot_num, OCFS2_INVALID_SLOT);
+	ocfs2_invalidate_slot(si, osb->slot_num);
 	osb->slot_num = OCFS2_INVALID_SLOT;
 	spin_unlock(&osb->osb_lock);
 
diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h
index 5118e89..601c95f 100644
--- a/fs/ocfs2/slot_map.h
+++ b/fs/ocfs2/slot_map.h
@@ -39,6 +39,6 @@ int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num);
 int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
 				  unsigned int *node_num);
 
-int ocfs2_clear_slot(struct ocfs2_super *osb, s16 slot_num);
+int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num);
 
 #endif
-- 
1.5.3.8


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 6/7] ocfs2: Define the contents of the slot_map file.
  2008-03-05 22:51 [PATCH 0/7] ocfs2: Extended slot map Joel Becker
                   ` (4 preceding siblings ...)
  2008-03-05 22:52 ` [PATCH 5/7] ocfs2: De-magic the in-memory slot map Joel Becker
@ 2008-03-05 22:52 ` Joel Becker
  2008-03-05 22:52 ` [PATCH 7/7] ocfs2: New slot map format Joel Becker
  6 siblings, 0 replies; 8+ messages in thread
From: Joel Becker @ 2008-03-05 22:52 UTC (permalink / raw)
  To: ocfs2-devel; +Cc: linux-kernel, linux-fsdevel

The slot map file is merely an array of __le16.  Wrap it in a structure for
cleaner reference.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/ocfs2_fs.h |   12 ++++++++++++
 fs/ocfs2/slot_map.c |   15 ++++++++-------
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3633edd..3299116 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -475,6 +475,18 @@ struct ocfs2_extent_block
 };
 
 /*
+ * On disk slot map for OCFS2.  This defines the contents of the "slot_map"
+ * system file.
+ */
+struct ocfs2_slot_map {
+/*00*/	__le16 sm_slots[0];
+/*
+ * Actual on-disk size is one block.  OCFS2_MAX_SLOTS is 255,
+ * 255 * sizeof(__le16) == 512B, within the 512B block minimum blocksize.
+ */
+};
+
+/*
  * On disk superblock for OCFS2
  * Note that it is contained inside an ocfs2_dinode, so all offsets
  * are relative to the start of ocfs2_dinode.id2.
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 65a61bf..e7e7a74 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -85,17 +85,17 @@ static void ocfs2_set_slot(struct ocfs2_slot_info *si,
 static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 {
 	int i;
-	__le16 *disk_info;
+	struct ocfs2_slot_map *sm;
 
 	/* we don't read the slot block here as ocfs2_super_lock
 	 * should've made sure we have the most recent copy. */
-	disk_info = (__le16 *) si->si_bh[0]->b_data;
+	sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 
 	for (i = 0; i < si->si_num_slots; i++) {
-		if (le16_to_cpu(disk_info[i]) == (u16)OCFS2_INVALID_SLOT)
+		if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
 			ocfs2_invalidate_slot(si, i);
 		else
-			ocfs2_set_slot(si, i, le16_to_cpu(disk_info[i]));
+			ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
 	}
 }
 
@@ -135,15 +135,16 @@ static int ocfs2_update_disk_slots(struct ocfs2_super *osb,
 				   struct ocfs2_slot_info *si)
 {
 	int status, i;
-	__le16 *disk_info = (__le16 *) si->si_bh[0]->b_data;
+	struct ocfs2_slot_map *sm;
 
 	spin_lock(&osb->osb_lock);
+	sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 	for (i = 0; i < si->si_num_slots; i++) {
 		if (si->si_slots[i].sl_valid)
-			disk_info[i] =
+			sm->sm_slots[i] =
 				cpu_to_le16(si->si_slots[i].sl_node_num);
 		else
-			disk_info[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
+			sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
 	}
 	spin_unlock(&osb->osb_lock);
 
-- 
1.5.3.8


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 7/7] ocfs2: New slot map format
  2008-03-05 22:51 [PATCH 0/7] ocfs2: Extended slot map Joel Becker
                   ` (5 preceding siblings ...)
  2008-03-05 22:52 ` [PATCH 6/7] ocfs2: Define the contents of the slot_map file Joel Becker
@ 2008-03-05 22:52 ` Joel Becker
  6 siblings, 0 replies; 8+ messages in thread
From: Joel Becker @ 2008-03-05 22:52 UTC (permalink / raw)
  To: ocfs2-devel; +Cc: linux-kernel, linux-fsdevel

The old slot map had a few limitations:

- It was limited to one block, so the maximum slot count was 255.
- Each slot was signed 16bits, limiting node numbers to INT16_MAX.
- An empty slot was marked by the magic 0xFFFF (-1).

The new slot map format provides 32bit node numbers (UINT32_MAX), a
separate space to mark a slot in use, and extra room to grow.  The slot
map is now bounded by i_size, not a block.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/ocfs2.h    |    7 +++
 fs/ocfs2/ocfs2_fs.h |   31 +++++++++++++-
 fs/ocfs2/slot_map.c |  110 +++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 133 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 95f783d..f78e9ed 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -374,6 +374,13 @@ static inline int ocfs2_mount_local(struct ocfs2_super *osb)
 	return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
 }
 
+static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
+{
+	return (osb->s_feature_incompat &
+		OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP);
+}
+
+
 #define OCFS2_IS_VALID_DINODE(ptr)					\
 	(!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
 
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3299116..c495023 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -88,7 +88,8 @@
 #define OCFS2_FEATURE_COMPAT_SUPP	OCFS2_FEATURE_COMPAT_BACKUP_SB
 #define OCFS2_FEATURE_INCOMPAT_SUPP	(OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
 					 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
-					 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
+					 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
+					 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP)
 #define OCFS2_FEATURE_RO_COMPAT_SUPP	OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
 
 /*
@@ -125,6 +126,10 @@
 /* Support for data packed into inode blocks */
 #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA	0x0040
 
+/* Support for the extended slot map */
+#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100
+
+
 /*
  * backup superblock flag is used to indicate that this volume
  * has backup superblocks.
@@ -476,7 +481,8 @@ struct ocfs2_extent_block
 
 /*
  * On disk slot map for OCFS2.  This defines the contents of the "slot_map"
- * system file.
+ * system file.  A slot is valid if it contains a node number >= 0.  The
+ * value -1 (0xFFFF) is OCFS2_INVALID_SLOT.  This marks a slot empty.
  */
 struct ocfs2_slot_map {
 /*00*/	__le16 sm_slots[0];
@@ -486,6 +492,27 @@ struct ocfs2_slot_map {
  */
 };
 
+struct ocfs2_extended_slot {
+/*00*/	__u8	es_valid;
+	__u8	es_reserved1[3];
+	__le32	es_node_num;
+/*10*/
+};
+
+/*
+ * The extended slot map, used when OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP
+ * is set.  It separates out the valid marker from the node number, and
+ * has room to grow.  Unlike the old slot map, this format is defined by
+ * i_size.
+ */
+struct ocfs2_slot_map_extended {
+/*00*/	struct ocfs2_extended_slot se_slots[0];
+/*
+ * Actual size is i_size of the slot_map system file.  It should
+ * match s_max_slots * sizeof(struct ocfs2_extended_slot)
+ */
+};
+
 /*
  * On disk superblock for OCFS2
  * Note that it is contained inside an ocfs2_dinode, so all offsets
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index e7e7a74..63fb1b2 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -49,6 +49,8 @@ struct ocfs2_slot {
 };
 
 struct ocfs2_slot_info {
+	int si_extended;
+	int si_slots_per_block;
 	struct inode *si_inode;
 	unsigned int si_blocks;
 	struct buffer_head **si_bh;
@@ -78,17 +80,37 @@ static void ocfs2_set_slot(struct ocfs2_slot_info *si,
 	si->si_slots[slot_num].sl_node_num = node_num;
 }
 
+/* This version is for the extended slot map */
+static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
+{
+	int b, i, slotno;
+	struct ocfs2_slot_map_extended *se;
+
+	slotno = 0;
+	for (b = 0; b < si->si_blocks; b++) {
+		se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
+		for (i = 0;
+		     (i < si->si_slots_per_block) &&
+		     (slotno < si->si_num_slots);
+		     i++, slotno++) {
+			if (se->se_slots[i].es_valid)
+				ocfs2_set_slot(si, slotno,
+					       le32_to_cpu(se->se_slots[i].es_node_num));
+			else
+				ocfs2_invalidate_slot(si, slotno);
+		}
+	}
+}
+
 /*
  * Post the slot information on disk into our slot_info struct.
  * Must be protected by osb_lock.
  */
-static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
+static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
 {
 	int i;
 	struct ocfs2_slot_map *sm;
 
-	/* we don't read the slot block here as ocfs2_super_lock
-	 * should've made sure we have the most recent copy. */
 	sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 
 	for (i = 0; i < si->si_num_slots; i++) {
@@ -99,6 +121,18 @@ static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 	}
 }
 
+static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
+{
+	/*
+	 * The slot data will have been refreshed when ocfs2_super_lock
+	 * was taken.
+	 */
+	if (si->si_extended)
+		ocfs2_update_slot_info_extended(si);
+	else
+		ocfs2_update_slot_info_old(si);
+}
+
 int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 {
 	int ret;
@@ -131,13 +165,31 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 
 /* post the our slot info stuff into it's destination bh and write it
  * out. */
-static int ocfs2_update_disk_slots(struct ocfs2_super *osb,
-				   struct ocfs2_slot_info *si)
+static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
+					    int slot_num,
+					    struct buffer_head **bh)
+{
+	int blkind = slot_num / si->si_slots_per_block;
+	int slotno = slot_num % si->si_slots_per_block;
+	struct ocfs2_slot_map_extended *se;
+
+	BUG_ON(blkind >= si->si_blocks);
+
+	se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
+	se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
+	if (si->si_slots[slot_num].sl_valid)
+		se->se_slots[slotno].es_node_num =
+			cpu_to_le32(si->si_slots[slot_num].sl_node_num);
+	*bh = si->si_bh[blkind];
+}
+
+static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
+				       int slot_num,
+				       struct buffer_head **bh)
 {
-	int status, i;
+	int i;
 	struct ocfs2_slot_map *sm;
 
-	spin_lock(&osb->osb_lock);
 	sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 	for (i = 0; i < si->si_num_slots; i++) {
 		if (si->si_slots[i].sl_valid)
@@ -146,9 +198,24 @@ static int ocfs2_update_disk_slots(struct ocfs2_super *osb,
 		else
 			sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
 	}
+	*bh = si->si_bh[0];
+}
+
+static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
+				  struct ocfs2_slot_info *si,
+				  int slot_num)
+{
+	int status;
+	struct buffer_head *bh;
+
+	spin_lock(&osb->osb_lock);
+	if (si->si_extended)
+		ocfs2_update_disk_slot_extended(si, slot_num, &bh);
+	else
+		ocfs2_update_disk_slot_old(si, slot_num, &bh);
 	spin_unlock(&osb->osb_lock);
 
-	status = ocfs2_write_block(osb, si->si_bh[0], si->si_inode);
+	status = ocfs2_write_block(osb, bh, si->si_inode);
 	if (status < 0)
 		mlog_errno(status);
 
@@ -165,7 +232,12 @@ static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
 {
 	unsigned long long bytes_needed;
 
-	bytes_needed = osb->max_slots * sizeof(__le16);
+	if (ocfs2_uses_extended_slot_map(osb)) {
+		bytes_needed = osb->max_slots *
+			sizeof(struct ocfs2_extended_slot);
+	} else {
+		bytes_needed = osb->max_slots * sizeof(__le16);
+	}
 	if (bytes_needed > i_size_read(inode)) {
 		mlog(ML_ERROR,
 		     "Slot map file is too small!  (size %llu, needed %llu)\n",
@@ -279,7 +351,7 @@ int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
 	ocfs2_invalidate_slot(si, slot_num);
 	spin_unlock(&osb->osb_lock);
 
-	return ocfs2_update_disk_slots(osb, osb->slot_info);
+	return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
 }
 
 static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
@@ -301,6 +373,16 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 	if (!si->si_blocks)
 		goto bail;
 
+	if (si->si_extended)
+		si->si_slots_per_block =
+			(osb->sb->s_blocksize /
+			 sizeof(struct ocfs2_extended_slot));
+	else
+		si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
+
+	/* The size checks above should ensure this */
+	BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
+
 	mlog(0, "Slot map needs %u buffers for %llu bytes\n",
 	     si->si_blocks, bytes);
 
@@ -352,6 +434,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
 		goto bail;
 	}
 
+	si->si_extended = ocfs2_uses_extended_slot_map(osb);
 	si->si_num_slots = osb->max_slots;
 	si->si_slots = (struct ocfs2_slot *)((char *)si +
 					     sizeof(struct ocfs2_slot_info));
@@ -425,7 +508,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
 
 	mlog(0, "taking node slot %d\n", osb->slot_num);
 
-	status = ocfs2_update_disk_slots(osb, si);
+	status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
 	if (status < 0)
 		mlog_errno(status);
 
@@ -436,7 +519,7 @@ bail:
 
 void ocfs2_put_slot(struct ocfs2_super *osb)
 {
-	int status;
+	int status, slot_num;
 	struct ocfs2_slot_info *si = osb->slot_info;
 
 	if (!si)
@@ -445,11 +528,12 @@ void ocfs2_put_slot(struct ocfs2_super *osb)
 	spin_lock(&osb->osb_lock);
 	ocfs2_update_slot_info(si);
 
+	slot_num = osb->slot_num;
 	ocfs2_invalidate_slot(si, osb->slot_num);
 	osb->slot_num = OCFS2_INVALID_SLOT;
 	spin_unlock(&osb->osb_lock);
 
-	status = ocfs2_update_disk_slots(osb, si);
+	status = ocfs2_update_disk_slot(osb, si, slot_num);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
-- 
1.5.3.8


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2008-03-05 22:59 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-03-05 22:51 [PATCH 0/7] ocfs2: Extended slot map Joel Becker
2008-03-05 22:51 ` [PATCH 1/7] ocfs2: Move slot map access into slot_map.c Joel Becker
2008-03-05 22:51 ` [PATCH 2/7] ocfs2: Make ocfs2_slot_info private Joel Becker
2008-03-05 22:52 ` [PATCH 3/7] ocfs2: Change the recovery map to an array of node numbers Joel Becker
2008-03-05 22:52 ` [PATCH 4/7] ocfs2: slot_map I/O based on max_slots Joel Becker
2008-03-05 22:52 ` [PATCH 5/7] ocfs2: De-magic the in-memory slot map Joel Becker
2008-03-05 22:52 ` [PATCH 6/7] ocfs2: Define the contents of the slot_map file Joel Becker
2008-03-05 22:52 ` [PATCH 7/7] ocfs2: New slot map format Joel Becker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).