LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
@ 2015-01-12  7:14 Chao Yu
  2015-01-20 15:06 ` Changman Lee
  2015-01-23  1:48 ` Jaegeuk Kim
  0 siblings, 2 replies; 9+ messages in thread
From: Chao Yu @ 2015-01-12  7:14 UTC (permalink / raw)
  To: Jaegeuk Kim, Changman Lee; +Cc: linux-f2fs-devel, linux-kernel

This patch adds core functions including slab cache init function and
init/lookup/update/shrink/destroy function for rb-tree based extent cache.

Thank Jaegeuk Kim and Changman Lee as they gave much suggestion about detail
design and implementation of extent cache.

Todo:
 * add a cached_ei into struct extent_tree for a quick recent cache.
 * register rb-based extent cache shrink with mm shrink interface.
 * disable dir inode's extent cache.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Changman Lee <cm224.lee@samsung.com>
---
 fs/f2fs/data.c | 458 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/f2fs/node.c |   9 +-
 2 files changed, 466 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 4f5b871e..bf8c5eb 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -25,6 +25,9 @@
 #include "trace.h"
 #include <trace/events/f2fs.h>
 
+struct kmem_cache *extent_tree_slab;
+struct kmem_cache *extent_node_slab;
+
 static void f2fs_read_end_io(struct bio *bio, int err)
 {
 	struct bio_vec *bvec;
@@ -373,6 +376,430 @@ end_update:
 	return need_update;
 }
 
+static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
+							unsigned int fofs)
+{
+	struct rb_node *node = et->root.rb_node;
+	struct extent_node *en;
+
+	while (node) {
+		en = rb_entry(node, struct extent_node, rb_node);
+		if (fofs < en->ei.fofs)
+			node = node->rb_left;
+		else if (fofs >= en->ei.fofs + en->ei.len)
+			node = node->rb_right;
+		else
+			return en;
+	}
+	return NULL;
+}
+
+static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
+						u32 blk, unsigned int len)
+{
+	ei->fofs = fofs;
+	ei->blk = blk;
+	ei->len = len;
+}
+
+static inline bool __is_extent_mergeable(struct extent_info *back,
+						struct extent_info *front)
+{
+	return (back->fofs + back->len == front->fofs &&
+			back->blk + back->len == front->blk);
+}
+
+static bool __is_back_mergeable(struct extent_info *cur,
+						struct extent_info *back)
+{
+	return __is_extent_mergeable(back, cur);
+}
+
+static bool __is_front_mergeable(struct extent_info *cur,
+						struct extent_info *front)
+{
+	return __is_extent_mergeable(cur, front);
+}
+
+static struct extent_node *__try_back_merge(struct extent_tree *et,
+						struct extent_node *en)
+{
+	struct extent_node *prev;
+	struct rb_node *node;
+
+	node = rb_prev(&en->rb_node);
+	if (!node)
+		return NULL;
+
+	prev = rb_entry(node, struct extent_node, rb_node);
+	if (__is_back_mergeable(&en->ei, &prev->ei)) {
+		en->ei.fofs = prev->ei.fofs;
+		en->ei.blk = prev->ei.blk;
+		en->ei.len += prev->ei.len;
+		rb_erase(&prev->rb_node, &et->root);
+		et->count--;
+		return prev;
+	}
+	return NULL;
+}
+
+static struct extent_node *__try_front_merge(struct extent_tree *et,
+						struct extent_node *en)
+{
+	struct extent_node *next;
+	struct rb_node *node;
+
+	node = rb_next(&en->rb_node);
+	if (!node)
+		return NULL;
+
+	next = rb_entry(node, struct extent_node, rb_node);
+	if (__is_front_mergeable(&en->ei, &next->ei)) {
+		en->ei.len += next->ei.len;
+		rb_erase(&next->rb_node, &et->root);
+		et->count--;
+		return next;
+	}
+	return NULL;
+}
+
+static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
+				struct extent_tree *et, struct extent_info *ei,
+				struct extent_node **den)
+{
+	struct rb_node **p = &et->root.rb_node;
+	struct rb_node *parent = NULL;
+	struct extent_node *en;
+
+	while (*p) {
+		parent = *p;
+		en = rb_entry(parent, struct extent_node, rb_node);
+
+		if (ei->fofs < en->ei.fofs) {
+			if (__is_front_mergeable(ei, &en->ei)) {
+				f2fs_bug_on(sbi, !den);
+				en->ei.fofs = ei->fofs;
+				en->ei.blk = ei->blk;
+				en->ei.len += ei->len;
+				*den = __try_back_merge(et, en);
+				return en;
+			}
+			p = &(*p)->rb_left;
+		} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
+			if (__is_back_mergeable(ei, &en->ei)) {
+				f2fs_bug_on(sbi, !den);
+				en->ei.len += ei->len;
+				*den = __try_front_merge(et, en);
+				return en;
+			}
+			p = &(*p)->rb_right;
+		} else
+			f2fs_bug_on(F2FS_I_SB(inode), 1);
+	}
+
+	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
+	en->ei = *ei;
+	INIT_LIST_HEAD(&en->list);
+
+	rb_link_node(&en->rb_node, parent, p);
+	rb_insert_color(&en->rb_node, &et->root);
+	atomic_inc(&sbi->total_ext_node);
+	et->count++;
+
+	return en;
+}
+
+static struct extent_node *__remove_extent_tree(struct extent_tree *et,
+							unsigned int fofs)
+{
+	struct rb_node *p = et->root.rb_node;
+	struct extent_node *en;
+
+	while (p) {
+		en = rb_entry(p, struct extent_node, rb_node);
+
+		if (fofs < en->ei.fofs)
+			p = p->rb_left;
+		else if (fofs >= en->ei.fofs + en->ei.len)
+			p = p->rb_right;
+		else {
+			rb_erase(&en->rb_node, &et->root);
+			et->count--;
+			return en;
+		}
+	}
+	return NULL;
+}
+
+static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
+					struct extent_tree *et, bool free_all)
+{
+	struct rb_node *node, *next;
+	struct extent_node *en;
+	unsigned int count = et->count;
+
+	node = rb_first(&et->root);
+	while (node) {
+		next = rb_next(node);
+		en = rb_entry(node, struct extent_node, rb_node);
+
+		if (free_all) {
+			spin_lock(&sbi->extent_lock);
+			if (!list_empty(&en->list))
+				list_del_init(&en->list);
+			spin_unlock(&sbi->extent_lock);
+		}
+
+		if (free_all || list_empty(&en->list)) {
+			rb_erase(node, &et->root);
+			kmem_cache_free(extent_node_slab, en);
+			atomic_dec(&sbi->total_ext_node);
+			et->count--;
+		}
+		node = next;
+	}
+
+	return count - et->count;
+}
+
+static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
+							struct extent_info *ei)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct extent_tree *et;
+	struct extent_node *en;
+
+	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
+		return false;
+
+	down_read(&sbi->extent_tree_lock);
+	et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
+	if (!et) {
+		up_read(&sbi->extent_tree_lock);
+		return false;
+	}
+	atomic_inc(&et->refcount);
+	up_read(&sbi->extent_tree_lock);
+
+	read_lock(&et->lock);
+	en = __lookup_extent_tree(et, pgofs);
+	if (en) {
+		*ei = en->ei;
+		spin_lock(&sbi->extent_lock);
+		if (!list_empty(&en->list))
+			list_move_tail(&en->list, &sbi->extent_list);
+		spin_unlock(&sbi->extent_lock);
+		stat_inc_read_hit(sbi->sb);
+	}
+	stat_inc_total_hit(sbi->sb);
+	read_unlock(&et->lock);
+
+	atomic_dec(&et->refcount);
+	return en ? true : false;
+}
+
+static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
+							block_t blkaddr)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	nid_t ino = inode->i_ino;
+	struct extent_tree *et;
+	struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
+	struct extent_node *den = NULL;
+	struct extent_info *pei;
+	struct extent_info ei;
+	unsigned int endofs;
+
+	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
+		return;
+
+retry:
+	down_write(&sbi->extent_tree_lock);
+	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
+	if (!et) {
+		et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);
+		if (!et) {
+			up_write(&sbi->extent_tree_lock);
+			goto retry;
+		}
+		if (radix_tree_insert(&sbi->extent_tree_root, ino, et)) {
+			up_write(&sbi->extent_tree_lock);
+			kmem_cache_free(extent_tree_slab, et);
+			goto retry;
+		}
+		memset(et, 0, sizeof(struct extent_tree));
+		et->ino = ino;
+		et->root = RB_ROOT;
+		rwlock_init(&et->lock);
+		atomic_set(&et->refcount, 0);
+		et->count = 0;
+		sbi->total_ext_tree++;
+	}
+	atomic_inc(&et->refcount);
+	up_write(&sbi->extent_tree_lock);
+
+	write_lock(&et->lock);
+
+	/* 1. lookup and remove exist extent info in cache */
+	en = __remove_extent_tree(et, fofs);
+	if (!en)
+		goto update_extent;
+
+	pei = &en->ei;
+	/* 2. if extent can be split more, split and insert the left part */
+	if (pei->len > 1) {
+		/*  insert left part of split extent into cache */
+		if (pei->fofs < fofs) {
+			set_extent_info(&ei, pei->fofs, pei->blk,
+							fofs - pei->fofs);
+			en1 = __insert_extent_tree(sbi, et, &ei, NULL);
+		}
+
+		/* insert right part of split extent into cache */
+		endofs = pei->fofs + pei->len - 1;
+		if (endofs > fofs) {
+			set_extent_info(&ei, fofs + 1,
+				fofs - pei->fofs + pei->blk, endofs - fofs);
+			en2 = __insert_extent_tree(sbi, et, &ei, NULL);
+		}
+	}
+
+update_extent:
+	/* 3. update extent in extent cache */
+	if (blkaddr) {
+		set_extent_info(&ei, fofs, blkaddr, 1);
+		en3 = __insert_extent_tree(sbi, et, &ei, &den);
+	}
+
+	/* 4. update in global extent list */
+	spin_lock(&sbi->extent_lock);
+	if (en && !list_empty(&en->list))
+		list_del_init(&en->list);
+	/*
+	 * en1 and en2 split from en, they will become more and more smaller
+	 * fragments after splitting several times. So if the length is smaller
+	 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent_list,
+	 * but just waiting shrinker to free them for reclaiming when OOM.
+	 */
+	if (en1 && en1->ei.len >= F2FS_MIN_EXTENT_LEN)
+		list_add_tail(&en1->list, &sbi->extent_list);
+	if (en2 && en2->ei.len >= F2FS_MIN_EXTENT_LEN)
+		list_add_tail(&en2->list, &sbi->extent_list);
+	if (en3) {
+		if (list_empty(&en3->list))
+			list_add_tail(&en3->list, &sbi->extent_list);
+		else
+			list_move_tail(&en3->list, &sbi->extent_list);
+	}
+	if (den && !list_empty(&den->list))
+		list_del_init(&den->list);
+	spin_unlock(&sbi->extent_lock);
+
+	if (en) {
+		kmem_cache_free(extent_node_slab, en);
+		atomic_dec(&sbi->total_ext_node);
+	}
+	if (den) {
+		kmem_cache_free(extent_node_slab, den);
+		atomic_dec(&sbi->total_ext_node);
+	}
+
+	write_unlock(&et->lock);
+	atomic_dec(&et->refcount);
+}
+
+void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
+{
+	struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
+	struct extent_node *en, *tmp;
+	unsigned long ino = F2FS_ROOT_INO(sbi);
+	struct radix_tree_iter iter;
+	void **slot;
+	unsigned int found;
+	unsigned int node_cnt = 0, tree_cnt = 0;
+
+	if (available_free_memory(sbi, EXTENT_CACHE))
+		return;
+
+	spin_lock(&sbi->extent_lock);
+	list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
+		if (!nr_shrink--)
+			break;
+		list_del_init(&en->list);
+	}
+	spin_unlock(&sbi->extent_lock);
+
+	down_read(&sbi->extent_tree_lock);
+	while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
+				(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
+		unsigned i;
+
+		ino = treevec[found - 1]->ino + 1;
+		for (i = 0; i < found; i++) {
+			struct extent_tree *et = treevec[i];
+
+			atomic_inc(&et->refcount);
+			write_lock(&et->lock);
+			node_cnt += __free_extent_tree(sbi, et, false);
+			write_unlock(&et->lock);
+			atomic_dec(&et->refcount);
+		}
+	}
+	up_read(&sbi->extent_tree_lock);
+
+	down_write(&sbi->extent_tree_lock);
+	radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
+							F2FS_ROOT_INO(sbi)) {
+		struct extent_tree *et = (struct extent_tree *)*slot;
+
+		if (!atomic_read(&et->refcount) && !et->count) {
+			radix_tree_delete(&sbi->extent_tree_root, et->ino);
+			kmem_cache_free(extent_tree_slab, et);
+			sbi->total_ext_tree--;
+			tree_cnt++;
+		}
+	}
+	up_write(&sbi->extent_tree_lock);
+}
+
+void f2fs_destroy_extent_tree(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct extent_tree *et;
+	unsigned int node_cnt = 0;
+
+	down_read(&sbi->extent_tree_lock);
+	et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
+	if (!et) {
+		up_read(&sbi->extent_tree_lock);
+		goto out;
+	}
+	atomic_inc(&et->refcount);
+	up_read(&sbi->extent_tree_lock);
+
+	/* free all extent info belong to this extent tree */
+	write_lock(&et->lock);
+	node_cnt = __free_extent_tree(sbi, et, true);
+	write_unlock(&et->lock);
+
+	atomic_dec(&et->refcount);
+
+	/* try to find and delete extent tree entry in radix tree */
+	down_write(&sbi->extent_tree_lock);
+	et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
+	if (!et) {
+		up_write(&sbi->extent_tree_lock);
+		goto out;
+	}
+	f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
+	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
+	kmem_cache_free(extent_tree_slab, et);
+	sbi->total_ext_tree--;
+	up_write(&sbi->extent_tree_lock);
+out:
+	return;
+}
+
 static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
 							struct extent_info *ei)
 {
@@ -1198,6 +1625,37 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, get_data_block);
 }
 
+void init_extent_cache_info(struct f2fs_sb_info *sbi)
+{
+	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOFS);
+	init_rwsem(&sbi->extent_tree_lock);
+	INIT_LIST_HEAD(&sbi->extent_list);
+	spin_lock_init(&sbi->extent_lock);
+	sbi->total_ext_tree = 0;
+	atomic_set(&sbi->total_ext_node, 0);
+}
+
+int __init create_extent_cache(void)
+{
+	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
+			sizeof(struct extent_tree));
+	if (!extent_tree_slab)
+		return -ENOMEM;
+	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
+			sizeof(struct extent_node));
+	if (!extent_node_slab) {
+		kmem_cache_destroy(extent_tree_slab);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void destroy_extent_cache(void)
+{
+	kmem_cache_destroy(extent_node_slab);
+	kmem_cache_destroy(extent_tree_slab);
+}
+
 const struct address_space_operations f2fs_dblock_aops = {
 	.readpage	= f2fs_read_data_page,
 	.readpages	= f2fs_read_data_pages,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d7c1436..387017f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -41,7 +41,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
 	/* only uses low memory */
 	avail_ram = val.totalram - val.totalhigh;
 
-	/* give 25%, 25%, 50%, 50% memory for each components respectively */
+	/*
+	 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
+	 */
 	if (type == FREE_NIDS) {
 		mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
 							PAGE_CACHE_SHIFT;
@@ -62,6 +64,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
 			mem_size += (sbi->im[i].ino_num *
 				sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
+	} else if (type == EXTENT_CACHE) {
+		mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
+				atomic_read(&sbi->total_ext_node) *
+				sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
+		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
 	} else {
 		if (sbi->sb->s_bdi->dirty_exceeded)
 			return false;
-- 
2.2.1



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
  2015-01-12  7:14 [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache Chao Yu
@ 2015-01-20 15:06 ` Changman Lee
  2015-01-21  8:41   ` Chao Yu
  2015-01-23  1:48 ` Jaegeuk Kim
  1 sibling, 1 reply; 9+ messages in thread
From: Changman Lee @ 2015-01-20 15:06 UTC (permalink / raw)
  To: Chao Yu; +Cc: Jaegeuk Kim, Changman Lee, linux-f2fs-devel, linux-kernel

Hi Chao,

Great works. :)

2015-01-12 16:14 GMT+09:00 Chao Yu <chao2.yu@samsung.com>:
> This patch adds core functions including slab cache init function and
> init/lookup/update/shrink/destroy function for rb-tree based extent cache.
>
> Thank Jaegeuk Kim and Changman Lee as they gave much suggestion about detail
> design and implementation of extent cache.
>
> Todo:
>  * add a cached_ei into struct extent_tree for a quick recent cache.
>  * register rb-based extent cache shrink with mm shrink interface.
>  * disable dir inode's extent cache.
>
> Signed-off-by: Chao Yu <chao2.yu@samsung.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> Signed-off-by: Changman Lee <cm224.lee@samsung.com>
> ---
>  fs/f2fs/data.c | 458 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/f2fs/node.c |   9 +-
>  2 files changed, 466 insertions(+), 1 deletion(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 4f5b871e..bf8c5eb 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -25,6 +25,9 @@
>  #include "trace.h"
>  #include <trace/events/f2fs.h>
>

~ snip ~

> +
> +static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
> +                                                       block_t blkaddr)
> +{
> +       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +       nid_t ino = inode->i_ino;
> +       struct extent_tree *et;
> +       struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
> +       struct extent_node *den = NULL;
> +       struct extent_info *pei;
> +       struct extent_info ei;
> +       unsigned int endofs;
> +
> +       if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
> +               return;
> +
> +retry:
> +       down_write(&sbi->extent_tree_lock);
> +       et = radix_tree_lookup(&sbi->extent_tree_root, ino);
> +       if (!et) {

We've already made some useful functions.
How about using f2fs_kmem_cache_alloc and f2fs_radix_tree_insert ?

> +               et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);
> +               if (!et) {
> +                       up_write(&sbi->extent_tree_lock);
> +                       goto retry;
> +               }
> +               if (radix_tree_insert(&sbi->extent_tree_root, ino, et)) {
> +                       up_write(&sbi->extent_tree_lock);
> +                       kmem_cache_free(extent_tree_slab, et);
> +                       goto retry;
> +               }
> +               memset(et, 0, sizeof(struct extent_tree));
> +               et->ino = ino;
> +               et->root = RB_ROOT;
> +               rwlock_init(&et->lock);
> +               atomic_set(&et->refcount, 0);
> +               et->count = 0;
> +               sbi->total_ext_tree++;
> +       }
> +       atomic_inc(&et->refcount);
> +       up_write(&sbi->extent_tree_lock);
> +

~ snip ~

> +
> +       write_unlock(&et->lock);
> +       atomic_dec(&et->refcount);
> +}
> +
> +void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
> +{
> +       struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
> +       struct extent_node *en, *tmp;
> +       unsigned long ino = F2FS_ROOT_INO(sbi);
> +       struct radix_tree_iter iter;
> +       void **slot;
> +       unsigned int found;
> +       unsigned int node_cnt = 0, tree_cnt = 0;
> +
> +       if (available_free_memory(sbi, EXTENT_CACHE))
> +               return;
> +
> +       spin_lock(&sbi->extent_lock);
> +       list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
> +               if (!nr_shrink--)
> +                       break;
> +               list_del_init(&en->list);
> +       }
> +       spin_unlock(&sbi->extent_lock);
> +

IMHO, it's expensive to retrieve all extent_tree to free extent_node
that list_empty() is true.
Is there any idea to improve this?
For example, if each extent_node has its extent_root, it would be more
fast by not to retrieve all trees.
Of course, however, it uses more memory.

But, I think that your patchset might just as well be merged because
patches are well made and it's clearly separated with mount option. In
the next time, we could improve this.

Regards,
Changman

> +       down_read(&sbi->extent_tree_lock);
> +       while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
> +                               (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
> +               unsigned i;
> +
> +               ino = treevec[found - 1]->ino + 1;
> +               for (i = 0; i < found; i++) {
> +                       struct extent_tree *et = treevec[i];
> +
> +                       atomic_inc(&et->refcount);
> +                       write_lock(&et->lock);
> +                       node_cnt += __free_extent_tree(sbi, et, false);
> +                       write_unlock(&et->lock);
> +                       atomic_dec(&et->refcount);
> +               }
> +       }
> +       up_read(&sbi->extent_tree_lock);
> +
> +       down_write(&sbi->extent_tree_lock);
> +       radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
> +                                                       F2FS_ROOT_INO(sbi)) {
> +               struct extent_tree *et = (struct extent_tree *)*slot;
> +
> +               if (!atomic_read(&et->refcount) && !et->count) {
> +                       radix_tree_delete(&sbi->extent_tree_root, et->ino);
> +                       kmem_cache_free(extent_tree_slab, et);
> +                       sbi->total_ext_tree--;
> +                       tree_cnt++;
> +               }
> +       }
> +       up_write(&sbi->extent_tree_lock);
> +}
> +

~ snip ~

> --
> 2.2.1
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
  2015-01-20 15:06 ` Changman Lee
@ 2015-01-21  8:41   ` Chao Yu
  2015-01-21 22:25     ` Changman Lee
  0 siblings, 1 reply; 9+ messages in thread
From: Chao Yu @ 2015-01-21  8:41 UTC (permalink / raw)
  To: 'Changman Lee'
  Cc: 'Jaegeuk Kim', 'Changman Lee',
	linux-f2fs-devel, linux-kernel

Hi Changman,

> -----Original Message-----
> From: Changman Lee [mailto:cm224.lee@gmail.com]
> Sent: Tuesday, January 20, 2015 11:06 PM
> To: Chao Yu
> Cc: Jaegeuk Kim; Changman Lee; linux-f2fs-devel@lists.sourceforge.net;
> linux-kernel@vger.kernel.org
> Subject: Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
> 
> Hi Chao,
> 
> Great works. :)

Thanks! :)

> 
> 2015-01-12 16:14 GMT+09:00 Chao Yu <chao2.yu@samsung.com>:
> > This patch adds core functions including slab cache init function and
> > init/lookup/update/shrink/destroy function for rb-tree based extent cache.
> >
> > Thank Jaegeuk Kim and Changman Lee as they gave much suggestion about detail
> > design and implementation of extent cache.
> >
> > Todo:
> >  * add a cached_ei into struct extent_tree for a quick recent cache.
> >  * register rb-based extent cache shrink with mm shrink interface.
> >  * disable dir inode's extent cache.
> >
> > Signed-off-by: Chao Yu <chao2.yu@samsung.com>
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > Signed-off-by: Changman Lee <cm224.lee@samsung.com>

If you do not object, I'd like to keep this as lots of details and ideas are from
you and Jaegeuk.

> > ---
> >  fs/f2fs/data.c | 458 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  fs/f2fs/node.c |   9 +-
> >  2 files changed, 466 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 4f5b871e..bf8c5eb 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -25,6 +25,9 @@
> >  #include "trace.h"
> >  #include <trace/events/f2fs.h>
> >
> 
> ~ snip ~
> 
> > +
> > +static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
> > +                                                       block_t blkaddr)
> > +{
> > +       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > +       nid_t ino = inode->i_ino;
> > +       struct extent_tree *et;
> > +       struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
> > +       struct extent_node *den = NULL;
> > +       struct extent_info *pei;
> > +       struct extent_info ei;
> > +       unsigned int endofs;
> > +
> > +       if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
> > +               return;
> > +
> > +retry:
> > +       down_write(&sbi->extent_tree_lock);
> > +       et = radix_tree_lookup(&sbi->extent_tree_root, ino);
> > +       if (!et) {
> 
> We've already made some useful functions.
> How about using f2fs_kmem_cache_alloc and f2fs_radix_tree_insert ?

IMO, we'd better to use original function kmem_cache_alloc and radix_tree_insert,
because if we use f2fs_{kmem_cache_alloc, radix_tree_insert}, we may loop in these
functions without releasing extent_tree_lock lock when OOM, so it will block lock
grabbers for long time which we do not wish to see.

> 
> > +               et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);
> > +               if (!et) {
> > +                       up_write(&sbi->extent_tree_lock);
> > +                       goto retry;
> > +               }
> > +               if (radix_tree_insert(&sbi->extent_tree_root, ino, et)) {
> > +                       up_write(&sbi->extent_tree_lock);
> > +                       kmem_cache_free(extent_tree_slab, et);
> > +                       goto retry;
> > +               }
> > +               memset(et, 0, sizeof(struct extent_tree));
> > +               et->ino = ino;
> > +               et->root = RB_ROOT;
> > +               rwlock_init(&et->lock);
> > +               atomic_set(&et->refcount, 0);
> > +               et->count = 0;
> > +               sbi->total_ext_tree++;
> > +       }
> > +       atomic_inc(&et->refcount);
> > +       up_write(&sbi->extent_tree_lock);
> > +
> 
> ~ snip ~
> 
> > +
> > +       write_unlock(&et->lock);
> > +       atomic_dec(&et->refcount);
> > +}
> > +
> > +void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
> > +{
> > +       struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
> > +       struct extent_node *en, *tmp;
> > +       unsigned long ino = F2FS_ROOT_INO(sbi);
> > +       struct radix_tree_iter iter;
> > +       void **slot;
> > +       unsigned int found;
> > +       unsigned int node_cnt = 0, tree_cnt = 0;
> > +
> > +       if (available_free_memory(sbi, EXTENT_CACHE))
> > +               return;
> > +
> > +       spin_lock(&sbi->extent_lock);
> > +       list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
> > +               if (!nr_shrink--)
> > +                       break;
> > +               list_del_init(&en->list);
> > +       }
> > +       spin_unlock(&sbi->extent_lock);
> > +
> 
> IMHO, it's expensive to retrieve all extent_tree to free extent_node
> that list_empty() is true.

Yes, it will cause heavy overhead to release extent_node in extent cache
which has huge number of extent_node.

> Is there any idea to improve this?
> For example, if each extent_node has its extent_root, it would be more
> fast by not to retrieve all trees.
> Of course, however, it uses more memory.

I think your solution is a good way to improve the performance.

> 
> But, I think that your patchset might just as well be merged because
> patches are well made and it's clearly separated with mount option. 

I hope so.

> In the next time, we could improve this.

There are also some thoughts in *todo* list, these can be added to developing list
if this patch set is applied.

Thanks for your review and suggestion! :)

Regards,
Yu

> 
> Regards,
> Changman
> 
> > +       down_read(&sbi->extent_tree_lock);
> > +       while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
> > +                               (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
> > +               unsigned i;
> > +
> > +               ino = treevec[found - 1]->ino + 1;
> > +               for (i = 0; i < found; i++) {
> > +                       struct extent_tree *et = treevec[i];
> > +
> > +                       atomic_inc(&et->refcount);
> > +                       write_lock(&et->lock);
> > +                       node_cnt += __free_extent_tree(sbi, et, false);
> > +                       write_unlock(&et->lock);
> > +                       atomic_dec(&et->refcount);
> > +               }
> > +       }
> > +       up_read(&sbi->extent_tree_lock);
> > +
> > +       down_write(&sbi->extent_tree_lock);
> > +       radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
> > +                                                       F2FS_ROOT_INO(sbi)) {
> > +               struct extent_tree *et = (struct extent_tree *)*slot;
> > +
> > +               if (!atomic_read(&et->refcount) && !et->count) {
> > +                       radix_tree_delete(&sbi->extent_tree_root, et->ino);
> > +                       kmem_cache_free(extent_tree_slab, et);
> > +                       sbi->total_ext_tree--;
> > +                       tree_cnt++;
> > +               }
> > +       }
> > +       up_write(&sbi->extent_tree_lock);
> > +}
> > +
> 
> ~ snip ~
> 
> > --
> > 2.2.1
> >
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
  2015-01-21  8:41   ` Chao Yu
@ 2015-01-21 22:25     ` Changman Lee
  2015-01-22  1:32       ` Chao Yu
  0 siblings, 1 reply; 9+ messages in thread
From: Changman Lee @ 2015-01-21 22:25 UTC (permalink / raw)
  To: Chao Yu
  Cc: 'Changman Lee', 'Jaegeuk Kim',
	linux-f2fs-devel, linux-kernel

On Wed, Jan 21, 2015 at 04:41:17PM +0800, Chao Yu wrote:
> Hi Changman,
> 
> > -----Original Message-----
> > From: Changman Lee [mailto:cm224.lee@gmail.com]
> > Sent: Tuesday, January 20, 2015 11:06 PM
> > To: Chao Yu
> > Cc: Jaegeuk Kim; Changman Lee; linux-f2fs-devel@lists.sourceforge.net;
> > linux-kernel@vger.kernel.org
> > Subject: Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
> > 
> > Hi Chao,
> > 
> > Great works. :)
> 
> Thanks! :)
> 
> > 
> > 2015-01-12 16:14 GMT+09:00 Chao Yu <chao2.yu@samsung.com>:
> > > This patch adds core functions including slab cache init function and
> > > init/lookup/update/shrink/destroy function for rb-tree based extent cache.
> > >
> > > Thank Jaegeuk Kim and Changman Lee as they gave much suggestion about detail
> > > design and implementation of extent cache.
> > >
> > > Todo:
> > >  * add a cached_ei into struct extent_tree for a quick recent cache.
> > >  * register rb-based extent cache shrink with mm shrink interface.
> > >  * disable dir inode's extent cache.
> > >
> > > Signed-off-by: Chao Yu <chao2.yu@samsung.com>
> > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > > Signed-off-by: Changman Lee <cm224.lee@samsung.com>
> 
> If you do not object, I'd like to keep this as lots of details and ideas are from
> you and Jaegeuk.
> 

I have no objection.

> > > ---
> > >  fs/f2fs/data.c | 458 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  fs/f2fs/node.c |   9 +-
> > >  2 files changed, 466 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > index 4f5b871e..bf8c5eb 100644
> > > --- a/fs/f2fs/data.c
> > > +++ b/fs/f2fs/data.c
> > > @@ -25,6 +25,9 @@
> > >  #include "trace.h"
> > >  #include <trace/events/f2fs.h>
> > >
> > 
> > ~ snip ~
> > 
> > > +
> > > +static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
> > > +                                                       block_t blkaddr)
> > > +{
> > > +       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > > +       nid_t ino = inode->i_ino;
> > > +       struct extent_tree *et;
> > > +       struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
> > > +       struct extent_node *den = NULL;
> > > +       struct extent_info *pei;
> > > +       struct extent_info ei;
> > > +       unsigned int endofs;
> > > +
> > > +       if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
> > > +               return;
> > > +
> > > +retry:
> > > +       down_write(&sbi->extent_tree_lock);
> > > +       et = radix_tree_lookup(&sbi->extent_tree_root, ino);
> > > +       if (!et) {
> > 
> > We've already made some useful functions.
> > How about using f2fs_kmem_cache_alloc and f2fs_radix_tree_insert ?
> 
> IMO, we'd better to use original function kmem_cache_alloc and radix_tree_insert,
> because if we use f2fs_{kmem_cache_alloc, radix_tree_insert}, we may loop in these
> functions without releasing extent_tree_lock lock when OOM, so it will block lock
> grabbers for long time which we do not wish to see.
> 

I see. If so, let's use cond_resched() in front of goto retry after
up_write. And also look into kmem_cache_alloc in __insert_extent_tree, please.

> > 
> > > +               et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);
> > > +               if (!et) {
> > > +                       up_write(&sbi->extent_tree_lock);
> > > +                       goto retry;
> > > +               }
> > > +               if (radix_tree_insert(&sbi->extent_tree_root, ino, et)) {
> > > +                       up_write(&sbi->extent_tree_lock);
> > > +                       kmem_cache_free(extent_tree_slab, et);
> > > +                       goto retry;
> > > +               }
> > > +               memset(et, 0, sizeof(struct extent_tree));
> > > +               et->ino = ino;
> > > +               et->root = RB_ROOT;
> > > +               rwlock_init(&et->lock);
> > > +               atomic_set(&et->refcount, 0);
> > > +               et->count = 0;
> > > +               sbi->total_ext_tree++;
> > > +       }
> > > +       atomic_inc(&et->refcount);
> > > +       up_write(&sbi->extent_tree_lock);
> > > +
> > 
> > ~ snip ~
> > 
> > > +
> > > +       write_unlock(&et->lock);
> > > +       atomic_dec(&et->refcount);
> > > +}
> > > +
> > > +void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
> > > +{
> > > +       struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
> > > +       struct extent_node *en, *tmp;
> > > +       unsigned long ino = F2FS_ROOT_INO(sbi);
> > > +       struct radix_tree_iter iter;
> > > +       void **slot;
> > > +       unsigned int found;
> > > +       unsigned int node_cnt = 0, tree_cnt = 0;
> > > +
> > > +       if (available_free_memory(sbi, EXTENT_CACHE))
> > > +               return;
> > > +
> > > +       spin_lock(&sbi->extent_lock);
> > > +       list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
> > > +               if (!nr_shrink--)
> > > +                       break;
> > > +               list_del_init(&en->list);
> > > +       }
> > > +       spin_unlock(&sbi->extent_lock);
> > > +
> > 
> > IMHO, it's expensive to retrieve all extent_tree to free extent_node
> > that list_empty() is true.
> 
> Yes, it will cause heavy overhead to release extent_node in extent cache
> which has huge number of extent_node.
> 
> > Is there any idea to improve this?
> > For example, if each extent_node has its extent_root, it would be more
> > fast by not to retrieve all trees.
> > Of course, however, it uses more memory.
> 
> I think your solution is a good way to improve the performance.
> 
> > 
> > But, I think that your patchset might just as well be merged because
> > patches are well made and it's clearly separated with mount option. 
> 
> I hope so.
> 
> > In the next time, we could improve this.
> 
> There are also some thoughts in *todo* list, these can be added to developing list
> if this patch set is applied.

Cheers,
Changman

> 
> Thanks for your review and suggestion! :)
> 
> Regards,
> Yu
> 
> > 
> > Regards,
> > Changman
> > 
> > > +       down_read(&sbi->extent_tree_lock);
> > > +       while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
> > > +                               (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
> > > +               unsigned i;
> > > +
> > > +               ino = treevec[found - 1]->ino + 1;
> > > +               for (i = 0; i < found; i++) {
> > > +                       struct extent_tree *et = treevec[i];
> > > +
> > > +                       atomic_inc(&et->refcount);
> > > +                       write_lock(&et->lock);
> > > +                       node_cnt += __free_extent_tree(sbi, et, false);
> > > +                       write_unlock(&et->lock);
> > > +                       atomic_dec(&et->refcount);
> > > +               }
> > > +       }
> > > +       up_read(&sbi->extent_tree_lock);
> > > +
> > > +       down_write(&sbi->extent_tree_lock);
> > > +       radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
> > > +                                                       F2FS_ROOT_INO(sbi)) {
> > > +               struct extent_tree *et = (struct extent_tree *)*slot;
> > > +
> > > +               if (!atomic_read(&et->refcount) && !et->count) {
> > > +                       radix_tree_delete(&sbi->extent_tree_root, et->ino);
> > > +                       kmem_cache_free(extent_tree_slab, et);
> > > +                       sbi->total_ext_tree--;
> > > +                       tree_cnt++;
> > > +               }
> > > +       }
> > > +       up_write(&sbi->extent_tree_lock);
> > > +}
> > > +
> > 
> > ~ snip ~
> > 
> > > --
> > > 2.2.1
> > >
> > >
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
  2015-01-21 22:25     ` Changman Lee
@ 2015-01-22  1:32       ` Chao Yu
  0 siblings, 0 replies; 9+ messages in thread
From: Chao Yu @ 2015-01-22  1:32 UTC (permalink / raw)
  To: 'Changman Lee'
  Cc: 'Changman Lee', 'Jaegeuk Kim',
	linux-f2fs-devel, linux-kernel

Hi Changman,

> -----Original Message-----
> From: Changman Lee [mailto:cm224.lee@samsung.com]
> Sent: Thursday, January 22, 2015 6:26 AM
> To: Chao Yu
> Cc: 'Changman Lee'; 'Jaegeuk Kim'; linux-f2fs-devel@lists.sourceforge.net;
> linux-kernel@vger.kernel.org
> Subject: Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
> 
> On Wed, Jan 21, 2015 at 04:41:17PM +0800, Chao Yu wrote:
> > Hi Changman,
> >
> > > -----Original Message-----
> > > From: Changman Lee [mailto:cm224.lee@gmail.com]
> > > Sent: Tuesday, January 20, 2015 11:06 PM
> > > To: Chao Yu
> > > Cc: Jaegeuk Kim; Changman Lee; linux-f2fs-devel@lists.sourceforge.net;
> > > linux-kernel@vger.kernel.org
> > > Subject: Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
> > >
> > > Hi Chao,
> > >
> > > Great works. :)
> >
> > Thanks! :)
> >
> > >
> > > 2015-01-12 16:14 GMT+09:00 Chao Yu <chao2.yu@samsung.com>:
> > > > This patch adds core functions including slab cache init function and
> > > > init/lookup/update/shrink/destroy function for rb-tree based extent cache.
> > > >
> > > > Thank Jaegeuk Kim and Changman Lee as they gave much suggestion about detail
> > > > design and implementation of extent cache.
> > > >
> > > > Todo:
> > > >  * add a cached_ei into struct extent_tree for a quick recent cache.
> > > >  * register rb-based extent cache shrink with mm shrink interface.
> > > >  * disable dir inode's extent cache.
> > > >
> > > > Signed-off-by: Chao Yu <chao2.yu@samsung.com>
> > > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > > > Signed-off-by: Changman Lee <cm224.lee@samsung.com>
> >
> > If you do not object, I'd like to keep this as lots of details and ideas are from
> > you and Jaegeuk.
> >
> 
> I have no objection.
> 
> > > > ---
> > > >  fs/f2fs/data.c | 458 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > >  fs/f2fs/node.c |   9 +-
> > > >  2 files changed, 466 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > > index 4f5b871e..bf8c5eb 100644
> > > > --- a/fs/f2fs/data.c
> > > > +++ b/fs/f2fs/data.c
> > > > @@ -25,6 +25,9 @@
> > > >  #include "trace.h"
> > > >  #include <trace/events/f2fs.h>
> > > >
> > >
> > > ~ snip ~
> > >
> > > > +
> > > > +static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
> > > > +                                                       block_t blkaddr)
> > > > +{
> > > > +       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > > > +       nid_t ino = inode->i_ino;
> > > > +       struct extent_tree *et;
> > > > +       struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
> > > > +       struct extent_node *den = NULL;
> > > > +       struct extent_info *pei;
> > > > +       struct extent_info ei;
> > > > +       unsigned int endofs;
> > > > +
> > > > +       if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
> > > > +               return;
> > > > +
> > > > +retry:
> > > > +       down_write(&sbi->extent_tree_lock);
> > > > +       et = radix_tree_lookup(&sbi->extent_tree_root, ino);
> > > > +       if (!et) {
> > >
> > > We've already made some useful functions.
> > > How about using f2fs_kmem_cache_alloc and f2fs_radix_tree_insert ?
> >
> > IMO, we'd better to use original function kmem_cache_alloc and radix_tree_insert,
> > because if we use f2fs_{kmem_cache_alloc, radix_tree_insert}, we may loop in these
> > functions without releasing extent_tree_lock lock when OOM, so it will block lock
> > grabbers for long time which we do not wish to see.
> >
> 
> I see. If so, let's use cond_resched() in front of goto retry after up_write.

Ah, yes, I will do that.

> And also look into kmem_cache_alloc in __insert_extent_tree, please.

I notice the problem, I'd like return NULL in __insert_extent_tree if we fail to 
alloc extent_node space with extent_node_slab.

Thank you for reminding me this!

Regards,
Yu

> 
> > >
> > > > +               et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);
> > > > +               if (!et) {
> > > > +                       up_write(&sbi->extent_tree_lock);
> > > > +                       goto retry;
> > > > +               }
> > > > +               if (radix_tree_insert(&sbi->extent_tree_root, ino, et)) {
> > > > +                       up_write(&sbi->extent_tree_lock);
> > > > +                       kmem_cache_free(extent_tree_slab, et);
> > > > +                       goto retry;
> > > > +               }
> > > > +               memset(et, 0, sizeof(struct extent_tree));
> > > > +               et->ino = ino;
> > > > +               et->root = RB_ROOT;
> > > > +               rwlock_init(&et->lock);
> > > > +               atomic_set(&et->refcount, 0);
> > > > +               et->count = 0;
> > > > +               sbi->total_ext_tree++;
> > > > +       }
> > > > +       atomic_inc(&et->refcount);
> > > > +       up_write(&sbi->extent_tree_lock);
> > > > +
> > >
> > > ~ snip ~
> > >
> > > > +
> > > > +       write_unlock(&et->lock);
> > > > +       atomic_dec(&et->refcount);
> > > > +}
> > > > +
> > > > +void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
> > > > +{
> > > > +       struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
> > > > +       struct extent_node *en, *tmp;
> > > > +       unsigned long ino = F2FS_ROOT_INO(sbi);
> > > > +       struct radix_tree_iter iter;
> > > > +       void **slot;
> > > > +       unsigned int found;
> > > > +       unsigned int node_cnt = 0, tree_cnt = 0;
> > > > +
> > > > +       if (available_free_memory(sbi, EXTENT_CACHE))
> > > > +               return;
> > > > +
> > > > +       spin_lock(&sbi->extent_lock);
> > > > +       list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
> > > > +               if (!nr_shrink--)
> > > > +                       break;
> > > > +               list_del_init(&en->list);
> > > > +       }
> > > > +       spin_unlock(&sbi->extent_lock);
> > > > +
> > >
> > > IMHO, it's expensive to retrieve all extent_tree to free extent_node
> > > that list_empty() is true.
> >
> > Yes, it will cause heavy overhead to release extent_node in extent cache
> > which has huge number of extent_node.
> >
> > > Is there any idea to improve this?
> > > For example, if each extent_node has its extent_root, it would be more
> > > fast by not to retrieve all trees.
> > > Of course, however, it uses more memory.
> >
> > I think your solution is a good way to improve the performance.
> >
> > >
> > > But, I think that your patchset might just as well be merged because
> > > patches are well made and it's clearly separated with mount option.
> >
> > I hope so.
> >
> > > In the next time, we could improve this.
> >
> > There are also some thoughts in *todo* list, these can be added to developing list
> > if this patch set is applied.
> 
> Cheers,
> Changman
> 
> >
> > Thanks for your review and suggestion! :)
> >
> > Regards,
> > Yu
> >
> > >
> > > Regards,
> > > Changman
> > >
> > > > +       down_read(&sbi->extent_tree_lock);
> > > > +       while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
> > > > +                               (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
> > > > +               unsigned i;
> > > > +
> > > > +               ino = treevec[found - 1]->ino + 1;
> > > > +               for (i = 0; i < found; i++) {
> > > > +                       struct extent_tree *et = treevec[i];
> > > > +
> > > > +                       atomic_inc(&et->refcount);
> > > > +                       write_lock(&et->lock);
> > > > +                       node_cnt += __free_extent_tree(sbi, et, false);
> > > > +                       write_unlock(&et->lock);
> > > > +                       atomic_dec(&et->refcount);
> > > > +               }
> > > > +       }
> > > > +       up_read(&sbi->extent_tree_lock);
> > > > +
> > > > +       down_write(&sbi->extent_tree_lock);
> > > > +       radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
> > > > +                                                       F2FS_ROOT_INO(sbi)) {
> > > > +               struct extent_tree *et = (struct extent_tree *)*slot;
> > > > +
> > > > +               if (!atomic_read(&et->refcount) && !et->count) {
> > > > +                       radix_tree_delete(&sbi->extent_tree_root, et->ino);
> > > > +                       kmem_cache_free(extent_tree_slab, et);
> > > > +                       sbi->total_ext_tree--;
> > > > +                       tree_cnt++;
> > > > +               }
> > > > +       }
> > > > +       up_write(&sbi->extent_tree_lock);
> > > > +}
> > > > +
> > >
> > > ~ snip ~
> > >
> > > > --
> > > > 2.2.1
> > > >
> > > >
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
  2015-01-12  7:14 [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache Chao Yu
  2015-01-20 15:06 ` Changman Lee
@ 2015-01-23  1:48 ` Jaegeuk Kim
  2015-01-23  6:15   ` Chao Yu
  1 sibling, 1 reply; 9+ messages in thread
From: Jaegeuk Kim @ 2015-01-23  1:48 UTC (permalink / raw)
  To: Chao Yu; +Cc: Changman Lee, linux-f2fs-devel, linux-kernel

On Mon, Jan 12, 2015 at 03:14:48PM +0800, Chao Yu wrote:
> This patch adds core functions including slab cache init function and
> init/lookup/update/shrink/destroy function for rb-tree based extent cache.
> 
> Thank Jaegeuk Kim and Changman Lee as they gave much suggestion about detail
> design and implementation of extent cache.
> 
> Todo:
>  * add a cached_ei into struct extent_tree for a quick recent cache.
>  * register rb-based extent cache shrink with mm shrink interface.
>  * disable dir inode's extent cache.
> 
> Signed-off-by: Chao Yu <chao2.yu@samsung.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> Signed-off-by: Changman Lee <cm224.lee@samsung.com>
> ---
>  fs/f2fs/data.c | 458 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/f2fs/node.c |   9 +-
>  2 files changed, 466 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 4f5b871e..bf8c5eb 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -25,6 +25,9 @@
>  #include "trace.h"
>  #include <trace/events/f2fs.h>
>  
> +struct kmem_cache *extent_tree_slab;
> +struct kmem_cache *extent_node_slab;
> +
>  static void f2fs_read_end_io(struct bio *bio, int err)
>  {
>  	struct bio_vec *bvec;
> @@ -373,6 +376,430 @@ end_update:
>  	return need_update;
>  }
>  
> +static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
> +							unsigned int fofs)
> +{
> +	struct rb_node *node = et->root.rb_node;
> +	struct extent_node *en;
> +
> +	while (node) {
> +		en = rb_entry(node, struct extent_node, rb_node);
> +		if (fofs < en->ei.fofs)
> +			node = node->rb_left;
> +		else if (fofs >= en->ei.fofs + en->ei.len)
> +			node = node->rb_right;
> +		else
> +			return en;
> +	}
> +	return NULL;
> +}
> +
> +static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
> +						u32 blk, unsigned int len)
> +{
> +	ei->fofs = fofs;
> +	ei->blk = blk;
> +	ei->len = len;
> +}
> +
> +static inline bool __is_extent_mergeable(struct extent_info *back,
> +						struct extent_info *front)
> +{
> +	return (back->fofs + back->len == front->fofs &&
> +			back->blk + back->len == front->blk);
> +}
> +
> +static bool __is_back_mergeable(struct extent_info *cur,
> +						struct extent_info *back)
> +{
> +	return __is_extent_mergeable(back, cur);
> +}
> +
> +static bool __is_front_mergeable(struct extent_info *cur,
> +						struct extent_info *front)
> +{
> +	return __is_extent_mergeable(cur, front);
> +}


How about declaring these four functions as inline ones and locating them
inside f2fs.h?

> +
> +static struct extent_node *__try_back_merge(struct extent_tree *et,
> +						struct extent_node *en)
> +{
> +	struct extent_node *prev;
> +	struct rb_node *node;
> +
> +	node = rb_prev(&en->rb_node);
> +	if (!node)
> +		return NULL;
> +
> +	prev = rb_entry(node, struct extent_node, rb_node);
> +	if (__is_back_mergeable(&en->ei, &prev->ei)) {
> +		en->ei.fofs = prev->ei.fofs;
> +		en->ei.blk = prev->ei.blk;
> +		en->ei.len += prev->ei.len;
> +		rb_erase(&prev->rb_node, &et->root);
> +		et->count--;
> +		return prev;
> +	}
> +	return NULL;
> +}
> +
> +static struct extent_node *__try_front_merge(struct extent_tree *et,
> +						struct extent_node *en)
> +{
> +	struct extent_node *next;
> +	struct rb_node *node;
> +
> +	node = rb_next(&en->rb_node);
> +	if (!node)
> +		return NULL;
> +
> +	next = rb_entry(node, struct extent_node, rb_node);
> +	if (__is_front_mergeable(&en->ei, &next->ei)) {
> +		en->ei.len += next->ei.len;
> +		rb_erase(&next->rb_node, &et->root);
> +		et->count--;
> +		return next;
> +	}
> +	return NULL;
> +}
> +
> +static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
> +				struct extent_tree *et, struct extent_info *ei,
> +				struct extent_node **den)
> +{
> +	struct rb_node **p = &et->root.rb_node;
> +	struct rb_node *parent = NULL;
> +	struct extent_node *en;
> +
> +	while (*p) {
> +		parent = *p;
> +		en = rb_entry(parent, struct extent_node, rb_node);
> +
> +		if (ei->fofs < en->ei.fofs) {
> +			if (__is_front_mergeable(ei, &en->ei)) {
> +				f2fs_bug_on(sbi, !den);
> +				en->ei.fofs = ei->fofs;
> +				en->ei.blk = ei->blk;
> +				en->ei.len += ei->len;
> +				*den = __try_back_merge(et, en);
> +				return en;
> +			}
> +			p = &(*p)->rb_left;
> +		} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
> +			if (__is_back_mergeable(ei, &en->ei)) {
> +				f2fs_bug_on(sbi, !den);
> +				en->ei.len += ei->len;
> +				*den = __try_front_merge(et, en);
> +				return en;
> +			}
> +			p = &(*p)->rb_right;
> +		} else
> +			f2fs_bug_on(F2FS_I_SB(inode), 1);

Coding style.

} else {
	...
}

> +	}

How about adding __attach_extent_node()?
{
> +
> +	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);

	en = f2fs_kmem_cache_alloc(.., GFP_ATOMIC);

> +	en->ei = *ei;
> +	INIT_LIST_HEAD(&en->list);
> +
> +	rb_link_node(&en->rb_node, parent, p);
> +	rb_insert_color(&en->rb_node, &et->root);
> +	atomic_inc(&sbi->total_ext_node);
> +	et->count++;

}

> +
> +	return en;
> +}
> +
> +static struct extent_node *__remove_extent_tree(struct extent_tree *et,
> +							unsigned int fofs)

This is __detach_extent_node()?

> +{
> +	struct rb_node *p = et->root.rb_node;
> +	struct extent_node *en;
> +
> +	while (p) {
> +		en = rb_entry(p, struct extent_node, rb_node);
> +
> +		if (fofs < en->ei.fofs)
> +			p = p->rb_left;

Coding style.

if () {
} else {
}

> +		else if (fofs >= en->ei.fofs + en->ei.len)
> +			p = p->rb_right;
> +		else {
> +			rb_erase(&en->rb_node, &et->root);
> +			et->count--;

Add here?
			atomic_dec(&sbi->total_ext_node);

> +			return en;
> +		}
> +	}
> +	return NULL;
> +}
> +
> +static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
> +					struct extent_tree *et, bool free_all)
> +{
> +	struct rb_node *node, *next;
> +	struct extent_node *en;
> +	unsigned int count = et->count;
> +
> +	node = rb_first(&et->root);
> +	while (node) {
> +		next = rb_next(node);
> +		en = rb_entry(node, struct extent_node, rb_node);
> +
> +		if (free_all) {
> +			spin_lock(&sbi->extent_lock);
> +			if (!list_empty(&en->list))
> +				list_del_init(&en->list);
> +			spin_unlock(&sbi->extent_lock);
> +		}
> +
> +		if (free_all || list_empty(&en->list)) {
> +			rb_erase(node, &et->root);
> +			kmem_cache_free(extent_node_slab, en);
> +			atomic_dec(&sbi->total_ext_node);
> +			et->count--;
> +		}
> +		node = next;
> +	}
> +
> +	return count - et->count;
> +}
> +
> +static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> +							struct extent_info *ei)
> +{
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +	struct extent_tree *et;
> +	struct extent_node *en;
> +
> +	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
> +		return false;
> +
> +	down_read(&sbi->extent_tree_lock);
> +	et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
> +	if (!et) {
> +		up_read(&sbi->extent_tree_lock);
> +		return false;
> +	}
> +	atomic_inc(&et->refcount);
> +	up_read(&sbi->extent_tree_lock);
> +
> +	read_lock(&et->lock);
> +	en = __lookup_extent_tree(et, pgofs);
> +	if (en) {
> +		*ei = en->ei;
> +		spin_lock(&sbi->extent_lock);
> +		if (!list_empty(&en->list))
> +			list_move_tail(&en->list, &sbi->extent_list);
> +		spin_unlock(&sbi->extent_lock);
> +		stat_inc_read_hit(sbi->sb);
> +	}
> +	stat_inc_total_hit(sbi->sb);
> +	read_unlock(&et->lock);
> +
> +	atomic_dec(&et->refcount);
> +	return en ? true : false;
> +}
> +
> +static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
> +							block_t blkaddr)
> +{
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +	nid_t ino = inode->i_ino;
> +	struct extent_tree *et;
> +	struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
> +	struct extent_node *den = NULL;
> +	struct extent_info *pei;
> +	struct extent_info ei;
> +	unsigned int endofs;
> +
> +	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
> +		return;
> +
> +retry:
> +	down_write(&sbi->extent_tree_lock);
> +	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
> +	if (!et) {
> +		et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);

et = f2fs_kmem_cache_alloc(.., GFP_ATOMIC);

> +		if (!et) {
> +			up_write(&sbi->extent_tree_lock);
> +			goto retry;
> +		}
> +		if (radix_tree_insert(&sbi->extent_tree_root, ino, et)) {
> +			up_write(&sbi->extent_tree_lock);
> +			kmem_cache_free(extent_tree_slab, et);
> +			goto retry;
> +		}
> +		memset(et, 0, sizeof(struct extent_tree));
> +		et->ino = ino;
> +		et->root = RB_ROOT;
> +		rwlock_init(&et->lock);
> +		atomic_set(&et->refcount, 0);
> +		et->count = 0;
> +		sbi->total_ext_tree++;
> +	}
> +	atomic_inc(&et->refcount);
> +	up_write(&sbi->extent_tree_lock);
> +
> +	write_lock(&et->lock);
> +
> +	/* 1. lookup and remove exist extent info in cache */

                                existing

> +	en = __remove_extent_tree(et, fofs);

en = __detach_extent_node();?

> +	if (!en)
> +		goto update_extent;
> +
> +	pei = &en->ei;
> +	/* 2. if extent can be split more, split and insert the left part */
> +	if (pei->len > 1) {
> +		/*  insert left part of split extent into cache */
> +		if (pei->fofs < fofs) {
> +			set_extent_info(&ei, pei->fofs, pei->blk,
> +							fofs - pei->fofs);
> +			en1 = __insert_extent_tree(sbi, et, &ei, NULL);
> +		}
> +
> +		/* insert right part of split extent into cache */
> +		endofs = pei->fofs + pei->len - 1;
> +		if (endofs > fofs) {
> +			set_extent_info(&ei, fofs + 1,
> +				fofs - pei->fofs + pei->blk, endofs - fofs);
> +			en2 = __insert_extent_tree(sbi, et, &ei, NULL);
> +		}
> +	}
> +
> +update_extent:
> +	/* 3. update extent in extent cache */
> +	if (blkaddr) {
> +		set_extent_info(&ei, fofs, blkaddr, 1);
> +		en3 = __insert_extent_tree(sbi, et, &ei, &den);
> +	}
> +
> +	/* 4. update in global extent list */
> +	spin_lock(&sbi->extent_lock);
> +	if (en && !list_empty(&en->list))
> +		list_del_init(&en->list);
> +	/*
> +	 * en1 and en2 split from en, they will become more and more smaller
> +	 * fragments after splitting several times. So if the length is smaller
> +	 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent_list,
> +	 * but just waiting shrinker to free them for reclaiming when OOM.
> +	 */

Can we just remove en1 and en2 in __insert_extent_tree?

> +	if (en1 && en1->ei.len >= F2FS_MIN_EXTENT_LEN)
> +		list_add_tail(&en1->list, &sbi->extent_list);
> +	if (en2 && en2->ei.len >= F2FS_MIN_EXTENT_LEN)
> +		list_add_tail(&en2->list, &sbi->extent_list);
> +	if (en3) {
> +		if (list_empty(&en3->list))
> +			list_add_tail(&en3->list, &sbi->extent_list);
> +		else
> +			list_move_tail(&en3->list, &sbi->extent_list);
> +	}
> +	if (den && !list_empty(&den->list))
> +		list_del_init(&den->list);
> +	spin_unlock(&sbi->extent_lock);
> +
> +	if (en) {
> +		kmem_cache_free(extent_node_slab, en);
> +		atomic_dec(&sbi->total_ext_node);

		--> move into __detach_extent_node().

> +	}
> +	if (den) {
> +		kmem_cache_free(extent_node_slab, den);
> +		atomic_dec(&sbi->total_ext_node);
> +	}
> +
> +	write_unlock(&et->lock);
> +	atomic_dec(&et->refcount);
> +}
> +
> +void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
> +{
> +	struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
> +	struct extent_node *en, *tmp;
> +	unsigned long ino = F2FS_ROOT_INO(sbi);
> +	struct radix_tree_iter iter;
> +	void **slot;
> +	unsigned int found;
> +	unsigned int node_cnt = 0, tree_cnt = 0;
> +
> +	if (available_free_memory(sbi, EXTENT_CACHE))
> +		return;
> +
> +	spin_lock(&sbi->extent_lock);
> +	list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
> +		if (!nr_shrink--)
> +			break;
> +		list_del_init(&en->list);
> +	}
> +	spin_unlock(&sbi->extent_lock);
> +
> +	down_read(&sbi->extent_tree_lock);
> +	while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
> +				(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
> +		unsigned i;
> +
> +		ino = treevec[found - 1]->ino + 1;
> +		for (i = 0; i < found; i++) {
> +			struct extent_tree *et = treevec[i];
> +
> +			atomic_inc(&et->refcount);
> +			write_lock(&et->lock);
> +			node_cnt += __free_extent_tree(sbi, et, false);
> +			write_unlock(&et->lock);
> +			atomic_dec(&et->refcount);
> +		}
> +	}
> +	up_read(&sbi->extent_tree_lock);
> +
> +	down_write(&sbi->extent_tree_lock);
> +	radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
> +							F2FS_ROOT_INO(sbi)) {
> +		struct extent_tree *et = (struct extent_tree *)*slot;
> +
> +		if (!atomic_read(&et->refcount) && !et->count) {
> +			radix_tree_delete(&sbi->extent_tree_root, et->ino);
> +			kmem_cache_free(extent_tree_slab, et);
> +			sbi->total_ext_tree--;
> +			tree_cnt++;

No use of tree_cnt.

Thanks,

> +		}
> +	}
> +	up_write(&sbi->extent_tree_lock);
> +}
> +
> +void f2fs_destroy_extent_tree(struct inode *inode)
> +{
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +	struct extent_tree *et;
> +	unsigned int node_cnt = 0;
> +
> +	down_read(&sbi->extent_tree_lock);
> +	et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
> +	if (!et) {
> +		up_read(&sbi->extent_tree_lock);
> +		goto out;
> +	}
> +	atomic_inc(&et->refcount);
> +	up_read(&sbi->extent_tree_lock);
> +
> +	/* free all extent info belong to this extent tree */
> +	write_lock(&et->lock);
> +	node_cnt = __free_extent_tree(sbi, et, true);
> +	write_unlock(&et->lock);
> +
> +	atomic_dec(&et->refcount);
> +
> +	/* try to find and delete extent tree entry in radix tree */
> +	down_write(&sbi->extent_tree_lock);
> +	et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
> +	if (!et) {
> +		up_write(&sbi->extent_tree_lock);
> +		goto out;
> +	}
> +	f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
> +	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
> +	kmem_cache_free(extent_tree_slab, et);
> +	sbi->total_ext_tree--;
> +	up_write(&sbi->extent_tree_lock);
> +out:
> +	return;
> +}
> +
>  static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
>  							struct extent_info *ei)
>  {
> @@ -1198,6 +1625,37 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
>  	return generic_block_bmap(mapping, block, get_data_block);
>  }
>  
> +void init_extent_cache_info(struct f2fs_sb_info *sbi)
> +{
> +	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOFS);
> +	init_rwsem(&sbi->extent_tree_lock);
> +	INIT_LIST_HEAD(&sbi->extent_list);
> +	spin_lock_init(&sbi->extent_lock);
> +	sbi->total_ext_tree = 0;
> +	atomic_set(&sbi->total_ext_node, 0);
> +}
> +
> +int __init create_extent_cache(void)
> +{
> +	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
> +			sizeof(struct extent_tree));
> +	if (!extent_tree_slab)
> +		return -ENOMEM;
> +	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
> +			sizeof(struct extent_node));
> +	if (!extent_node_slab) {
> +		kmem_cache_destroy(extent_tree_slab);
> +		return -ENOMEM;
> +	}
> +	return 0;
> +}
> +
> +void destroy_extent_cache(void)
> +{
> +	kmem_cache_destroy(extent_node_slab);
> +	kmem_cache_destroy(extent_tree_slab);
> +}
> +
>  const struct address_space_operations f2fs_dblock_aops = {
>  	.readpage	= f2fs_read_data_page,
>  	.readpages	= f2fs_read_data_pages,
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index d7c1436..387017f 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -41,7 +41,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
>  	/* only uses low memory */
>  	avail_ram = val.totalram - val.totalhigh;
>  
> -	/* give 25%, 25%, 50%, 50% memory for each components respectively */
> +	/*
> +	 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
> +	 */
>  	if (type == FREE_NIDS) {
>  		mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
>  							PAGE_CACHE_SHIFT;
> @@ -62,6 +64,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
>  			mem_size += (sbi->im[i].ino_num *
>  				sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
>  		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
> +	} else if (type == EXTENT_CACHE) {
> +		mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
> +				atomic_read(&sbi->total_ext_node) *
> +				sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
> +		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
>  	} else {
>  		if (sbi->sb->s_bdi->dirty_exceeded)
>  			return false;
> -- 
> 2.2.1

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
  2015-01-23  1:48 ` Jaegeuk Kim
@ 2015-01-23  6:15   ` Chao Yu
  2015-01-23 19:43     ` Jaegeuk Kim
  0 siblings, 1 reply; 9+ messages in thread
From: Chao Yu @ 2015-01-23  6:15 UTC (permalink / raw)
  To: 'Jaegeuk Kim'
  Cc: 'Changman Lee', linux-f2fs-devel, linux-kernel

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Friday, January 23, 2015 9:48 AM
> To: Chao Yu
> Cc: Changman Lee; linux-f2fs-devel@lists.sourceforge.net; linux-kernel@vger.kernel.org
> Subject: Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
> 
> On Mon, Jan 12, 2015 at 03:14:48PM +0800, Chao Yu wrote:
> > This patch adds core functions including slab cache init function and
> > init/lookup/update/shrink/destroy function for rb-tree based extent cache.
> >
> > Thank Jaegeuk Kim and Changman Lee as they gave much suggestion about detail
> > design and implementation of extent cache.
> >
> > Todo:
> >  * add a cached_ei into struct extent_tree for a quick recent cache.
> >  * register rb-based extent cache shrink with mm shrink interface.
> >  * disable dir inode's extent cache.
> >
> > Signed-off-by: Chao Yu <chao2.yu@samsung.com>
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > Signed-off-by: Changman Lee <cm224.lee@samsung.com>
> > ---
> >  fs/f2fs/data.c | 458 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  fs/f2fs/node.c |   9 +-
> >  2 files changed, 466 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 4f5b871e..bf8c5eb 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -25,6 +25,9 @@
> >  #include "trace.h"
> >  #include <trace/events/f2fs.h>
> >
> > +struct kmem_cache *extent_tree_slab;
> > +struct kmem_cache *extent_node_slab;
> > +
> >  static void f2fs_read_end_io(struct bio *bio, int err)
> >  {
> >  	struct bio_vec *bvec;
> > @@ -373,6 +376,430 @@ end_update:
> >  	return need_update;
> >  }
> >
> > +static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
> > +							unsigned int fofs)
> > +{
> > +	struct rb_node *node = et->root.rb_node;
> > +	struct extent_node *en;
> > +
> > +	while (node) {
> > +		en = rb_entry(node, struct extent_node, rb_node);
> > +		if (fofs < en->ei.fofs)
> > +			node = node->rb_left;
> > +		else if (fofs >= en->ei.fofs + en->ei.len)
> > +			node = node->rb_right;
> > +		else
> > +			return en;
> > +	}
> > +	return NULL;
> > +}
> > +
> > +static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
> > +						u32 blk, unsigned int len)
> > +{
> > +	ei->fofs = fofs;
> > +	ei->blk = blk;
> > +	ei->len = len;
> > +}
> > +
> > +static inline bool __is_extent_mergeable(struct extent_info *back,
> > +						struct extent_info *front)
> > +{
> > +	return (back->fofs + back->len == front->fofs &&
> > +			back->blk + back->len == front->blk);
> > +}
> > +
> > +static bool __is_back_mergeable(struct extent_info *cur,
> > +						struct extent_info *back)
> > +{
> > +	return __is_extent_mergeable(back, cur);
> > +}
> > +
> > +static bool __is_front_mergeable(struct extent_info *cur,
> > +						struct extent_info *front)
> > +{
> > +	return __is_extent_mergeable(cur, front);
> > +}
> 
> 
> How about declaring these four functions as inline ones and locating them
> inside f2fs.h?

Good idea! Will do.

> 
> > +
> > +static struct extent_node *__try_back_merge(struct extent_tree *et,
> > +						struct extent_node *en)
> > +{
> > +	struct extent_node *prev;
> > +	struct rb_node *node;
> > +
> > +	node = rb_prev(&en->rb_node);
> > +	if (!node)
> > +		return NULL;
> > +
> > +	prev = rb_entry(node, struct extent_node, rb_node);
> > +	if (__is_back_mergeable(&en->ei, &prev->ei)) {
> > +		en->ei.fofs = prev->ei.fofs;
> > +		en->ei.blk = prev->ei.blk;
> > +		en->ei.len += prev->ei.len;
> > +		rb_erase(&prev->rb_node, &et->root);
> > +		et->count--;
> > +		return prev;
> > +	}
> > +	return NULL;
> > +}
> > +
> > +static struct extent_node *__try_front_merge(struct extent_tree *et,
> > +						struct extent_node *en)
> > +{
> > +	struct extent_node *next;
> > +	struct rb_node *node;
> > +
> > +	node = rb_next(&en->rb_node);
> > +	if (!node)
> > +		return NULL;
> > +
> > +	next = rb_entry(node, struct extent_node, rb_node);
> > +	if (__is_front_mergeable(&en->ei, &next->ei)) {
> > +		en->ei.len += next->ei.len;
> > +		rb_erase(&next->rb_node, &et->root);
> > +		et->count--;
> > +		return next;
> > +	}
> > +	return NULL;
> > +}
> > +
> > +static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
> > +				struct extent_tree *et, struct extent_info *ei,
> > +				struct extent_node **den)
> > +{
> > +	struct rb_node **p = &et->root.rb_node;
> > +	struct rb_node *parent = NULL;
> > +	struct extent_node *en;
> > +
> > +	while (*p) {
> > +		parent = *p;
> > +		en = rb_entry(parent, struct extent_node, rb_node);
> > +
> > +		if (ei->fofs < en->ei.fofs) {
> > +			if (__is_front_mergeable(ei, &en->ei)) {
> > +				f2fs_bug_on(sbi, !den);
> > +				en->ei.fofs = ei->fofs;
> > +				en->ei.blk = ei->blk;
> > +				en->ei.len += ei->len;
> > +				*den = __try_back_merge(et, en);
> > +				return en;
> > +			}
> > +			p = &(*p)->rb_left;
> > +		} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
> > +			if (__is_back_mergeable(ei, &en->ei)) {
> > +				f2fs_bug_on(sbi, !den);
> > +				en->ei.len += ei->len;
> > +				*den = __try_front_merge(et, en);
> > +				return en;
> > +			}
> > +			p = &(*p)->rb_right;
> > +		} else
> > +			f2fs_bug_on(F2FS_I_SB(inode), 1);
> 
> Coding style.
> 
> } else {
> 	...
> }

will fix.

> 
> > +	}
> 
> How about adding __attach_extent_node()?

That's more readable.

How do you think of the following functions:

struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
				struct extent_tree *et, struct extent_info *ei,
				struct rb_node *parent, struct rb_node **p)
{
	struct extent_node *en;

	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
	if (!en)
		return NULL;

	en->ei = *ei;
	INIT_LIST_HEAD(&en->list);

	rb_link_node(&en->rb_node, parent, p);
	rb_insert_color(&en->rb_node, &et->root);
	et->count++;
	atomic_inc(&sbi->total_ext_node);
	return en;
}

static void __detach_extent_node(struct f2fs_sb_info *sbi,
				struct extent_tree *et, struct extent_node *en)
{
	rb_erase(&en->rb_node, &et->root);
	et->count--;
	atomic_dec(&sbi->total_ext_node);
}

> {
> > +
> > +	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
> 
> 	en = f2fs_kmem_cache_alloc(.., GFP_ATOMIC);

We should avoid cond_resched() in f2fs_kmem_cache_alloc when we are holding write_lock.

IMO, it's better to return NULL if we fail to alloc extent_node here.
Otherwise we'd better alloc extent_node before write_lock.

> 
> > +	en->ei = *ei;
> > +	INIT_LIST_HEAD(&en->list);
> > +
> > +	rb_link_node(&en->rb_node, parent, p);
> > +	rb_insert_color(&en->rb_node, &et->root);
> > +	atomic_inc(&sbi->total_ext_node);
> > +	et->count++;
> 
> }
> 
> > +
> > +	return en;
> > +}
> > +
> > +static struct extent_node *__remove_extent_tree(struct extent_tree *et,
> > +							unsigned int fofs)
> 
> This is __detach_extent_node()?
> 
> > +{
> > +	struct rb_node *p = et->root.rb_node;
> > +	struct extent_node *en;
> > +
> > +	while (p) {
> > +		en = rb_entry(p, struct extent_node, rb_node);
> > +
> > +		if (fofs < en->ei.fofs)
> > +			p = p->rb_left;
> 
> Coding style.
> 
> if () {
> } else {
> }

will fix.

> 
> > +		else if (fofs >= en->ei.fofs + en->ei.len)
> > +			p = p->rb_right;
> > +		else {
> > +			rb_erase(&en->rb_node, &et->root);
> > +			et->count--;
> 
> Add here?
> 			atomic_dec(&sbi->total_ext_node);

Agree, see __detach_extent_node() implementation above.

> 
> > +			return en;
> > +		}
> > +	}
> > +	return NULL;
> > +}
> > +
> > +static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
> > +					struct extent_tree *et, bool free_all)
> > +{
> > +	struct rb_node *node, *next;
> > +	struct extent_node *en;
> > +	unsigned int count = et->count;
> > +
> > +	node = rb_first(&et->root);
> > +	while (node) {
> > +		next = rb_next(node);
> > +		en = rb_entry(node, struct extent_node, rb_node);
> > +
> > +		if (free_all) {
> > +			spin_lock(&sbi->extent_lock);
> > +			if (!list_empty(&en->list))
> > +				list_del_init(&en->list);
> > +			spin_unlock(&sbi->extent_lock);
> > +		}
> > +
> > +		if (free_all || list_empty(&en->list)) {
> > +			rb_erase(node, &et->root);
> > +			kmem_cache_free(extent_node_slab, en);
> > +			atomic_dec(&sbi->total_ext_node);
> > +			et->count--;
> > +		}
> > +		node = next;
> > +	}
> > +
> > +	return count - et->count;
> > +}
> > +
> > +static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> > +							struct extent_info *ei)
> > +{
> > +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > +	struct extent_tree *et;
> > +	struct extent_node *en;
> > +
> > +	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
> > +		return false;
> > +
> > +	down_read(&sbi->extent_tree_lock);
> > +	et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
> > +	if (!et) {
> > +		up_read(&sbi->extent_tree_lock);
> > +		return false;
> > +	}
> > +	atomic_inc(&et->refcount);
> > +	up_read(&sbi->extent_tree_lock);
> > +
> > +	read_lock(&et->lock);
> > +	en = __lookup_extent_tree(et, pgofs);
> > +	if (en) {
> > +		*ei = en->ei;
> > +		spin_lock(&sbi->extent_lock);
> > +		if (!list_empty(&en->list))
> > +			list_move_tail(&en->list, &sbi->extent_list);
> > +		spin_unlock(&sbi->extent_lock);
> > +		stat_inc_read_hit(sbi->sb);
> > +	}
> > +	stat_inc_total_hit(sbi->sb);
> > +	read_unlock(&et->lock);
> > +
> > +	atomic_dec(&et->refcount);
> > +	return en ? true : false;
> > +}
> > +
> > +static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
> > +							block_t blkaddr)
> > +{
> > +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > +	nid_t ino = inode->i_ino;
> > +	struct extent_tree *et;
> > +	struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
> > +	struct extent_node *den = NULL;
> > +	struct extent_info *pei;
> > +	struct extent_info ei;
> > +	unsigned int endofs;
> > +
> > +	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
> > +		return;
> > +
> > +retry:
> > +	down_write(&sbi->extent_tree_lock);
> > +	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
> > +	if (!et) {
> > +		et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);
> 
> et = f2fs_kmem_cache_alloc(.., GFP_ATOMIC);

How about modifying as below to avoid holding extent_tree_lock for long time?

et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);
if (!et) {
	up_write(&sbi->extent_tree_lock);
	cond_resched();
	goto retry;
}

> 
> > +		if (!et) {
> > +			up_write(&sbi->extent_tree_lock);
> > +			goto retry;
> > +		}
> > +		if (radix_tree_insert(&sbi->extent_tree_root, ino, et)) {
> > +			up_write(&sbi->extent_tree_lock);
> > +			kmem_cache_free(extent_tree_slab, et);

cond_resched()?

> > +			goto retry;
> > +		}
> > +		memset(et, 0, sizeof(struct extent_tree));
> > +		et->ino = ino;
> > +		et->root = RB_ROOT;
> > +		rwlock_init(&et->lock);
> > +		atomic_set(&et->refcount, 0);
> > +		et->count = 0;
> > +		sbi->total_ext_tree++;
> > +	}
> > +	atomic_inc(&et->refcount);
> > +	up_write(&sbi->extent_tree_lock);
> > +
> > +	write_lock(&et->lock);
> > +
> > +	/* 1. lookup and remove exist extent info in cache */
> 
>                                 existing

got it.

> 
> > +	en = __remove_extent_tree(et, fofs);
> 
> en = __detach_extent_node();?

OK, it seems that most codes of __remove_extent_tree and __lookup_extent_tree 
are the same, so here I want to remove extent node like this:

	en = __lookup_extent_tree(et, fofs);
	if (!en)
		goto update_extent;

	dei = en->ei;
	__detach_extent_node(sbi, et, en);

How do you think?

> 
> > +	if (!en)
> > +		goto update_extent;
> > +
> > +	pei = &en->ei;
> > +	/* 2. if extent can be split more, split and insert the left part */
> > +	if (pei->len > 1) {
> > +		/*  insert left part of split extent into cache */
> > +		if (pei->fofs < fofs) {
> > +			set_extent_info(&ei, pei->fofs, pei->blk,
> > +							fofs - pei->fofs);
> > +			en1 = __insert_extent_tree(sbi, et, &ei, NULL);
> > +		}
> > +
> > +		/* insert right part of split extent into cache */
> > +		endofs = pei->fofs + pei->len - 1;
> > +		if (endofs > fofs) {
> > +			set_extent_info(&ei, fofs + 1,
> > +				fofs - pei->fofs + pei->blk, endofs - fofs);
> > +			en2 = __insert_extent_tree(sbi, et, &ei, NULL);
> > +		}
> > +	}
> > +
> > +update_extent:
> > +	/* 3. update extent in extent cache */
> > +	if (blkaddr) {
> > +		set_extent_info(&ei, fofs, blkaddr, 1);
> > +		en3 = __insert_extent_tree(sbi, et, &ei, &den);
> > +	}
> > +
> > +	/* 4. update in global extent list */
> > +	spin_lock(&sbi->extent_lock);
> > +	if (en && !list_empty(&en->list))
> > +		list_del_init(&en->list);
> > +	/*
> > +	 * en1 and en2 split from en, they will become more and more smaller
> > +	 * fragments after splitting several times. So if the length is smaller
> > +	 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent_list,
> > +	 * but just waiting shrinker to free them for reclaiming when OOM.
> > +	 */
> 
> Can we just remove en1 and en2 in __insert_extent_tree?

en1 and en2 is newly added, in __attach_extent_node we do not add en1 and en2 into
lru list, so if you do not want to keep split part in lru list, let's just remove
the below codes.

> 
> > +	if (en1 && en1->ei.len >= F2FS_MIN_EXTENT_LEN)
> > +		list_add_tail(&en1->list, &sbi->extent_list);
> > +	if (en2 && en2->ei.len >= F2FS_MIN_EXTENT_LEN)
> > +		list_add_tail(&en2->list, &sbi->extent_list);
> > +	if (en3) {
> > +		if (list_empty(&en3->list))
> > +			list_add_tail(&en3->list, &sbi->extent_list);
> > +		else
> > +			list_move_tail(&en3->list, &sbi->extent_list);
> > +	}
> > +	if (den && !list_empty(&den->list))
> > +		list_del_init(&den->list);
> > +	spin_unlock(&sbi->extent_lock);
> > +
> > +	if (en) {
> > +		kmem_cache_free(extent_node_slab, en);
> > +		atomic_dec(&sbi->total_ext_node);
> 
> 		--> move into __detach_extent_node().

will do.

> 
> > +	}
> > +	if (den) {
> > +		kmem_cache_free(extent_node_slab, den);
> > +		atomic_dec(&sbi->total_ext_node);
> > +	}
> > +
> > +	write_unlock(&et->lock);
> > +	atomic_dec(&et->refcount);
> > +}
> > +
> > +void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
> > +{
> > +	struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
> > +	struct extent_node *en, *tmp;
> > +	unsigned long ino = F2FS_ROOT_INO(sbi);
> > +	struct radix_tree_iter iter;
> > +	void **slot;
> > +	unsigned int found;
> > +	unsigned int node_cnt = 0, tree_cnt = 0;
> > +
> > +	if (available_free_memory(sbi, EXTENT_CACHE))
> > +		return;
> > +
> > +	spin_lock(&sbi->extent_lock);
> > +	list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
> > +		if (!nr_shrink--)
> > +			break;
> > +		list_del_init(&en->list);
> > +	}
> > +	spin_unlock(&sbi->extent_lock);
> > +
> > +	down_read(&sbi->extent_tree_lock);
> > +	while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
> > +				(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
> > +		unsigned i;
> > +
> > +		ino = treevec[found - 1]->ino + 1;
> > +		for (i = 0; i < found; i++) {
> > +			struct extent_tree *et = treevec[i];
> > +
> > +			atomic_inc(&et->refcount);
> > +			write_lock(&et->lock);
> > +			node_cnt += __free_extent_tree(sbi, et, false);
> > +			write_unlock(&et->lock);
> > +			atomic_dec(&et->refcount);
> > +		}
> > +	}
> > +	up_read(&sbi->extent_tree_lock);
> > +
> > +	down_write(&sbi->extent_tree_lock);
> > +	radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
> > +							F2FS_ROOT_INO(sbi)) {
> > +		struct extent_tree *et = (struct extent_tree *)*slot;
> > +
> > +		if (!atomic_read(&et->refcount) && !et->count) {
> > +			radix_tree_delete(&sbi->extent_tree_root, et->ino);
> > +			kmem_cache_free(extent_tree_slab, et);
> > +			sbi->total_ext_tree--;
> > +			tree_cnt++;
> 
> No use of tree_cnt.

This is used by trace function in RFC PATCH 10/10.

Thanks for your review! :)

Regards,
Yu

> 
> Thanks,

[snip]


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
  2015-01-23  6:15   ` Chao Yu
@ 2015-01-23 19:43     ` Jaegeuk Kim
  2015-01-26  5:39       ` Chao Yu
  0 siblings, 1 reply; 9+ messages in thread
From: Jaegeuk Kim @ 2015-01-23 19:43 UTC (permalink / raw)
  To: Chao Yu; +Cc: 'Changman Lee', linux-f2fs-devel, linux-kernel

On Fri, Jan 23, 2015 at 02:15:56PM +0800, Chao Yu wrote:
> Hi Jaegeuk,
> 
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > Sent: Friday, January 23, 2015 9:48 AM
> > To: Chao Yu
> > Cc: Changman Lee; linux-f2fs-devel@lists.sourceforge.net; linux-kernel@vger.kernel.org
> > Subject: Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
> > 
> > On Mon, Jan 12, 2015 at 03:14:48PM +0800, Chao Yu wrote:
> > > This patch adds core functions including slab cache init function and
> > > init/lookup/update/shrink/destroy function for rb-tree based extent cache.
> > >
> > > Thank Jaegeuk Kim and Changman Lee as they gave much suggestion about detail
> > > design and implementation of extent cache.
> > >
> > > Todo:
> > >  * add a cached_ei into struct extent_tree for a quick recent cache.
> > >  * register rb-based extent cache shrink with mm shrink interface.
> > >  * disable dir inode's extent cache.
> > >

snip

> 
> > 
> > > +	}
> > 
> > How about adding __attach_extent_node()?
> 
> That's more readable.
> 
> How do you think of the following functions:
> 
> struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
> 				struct extent_tree *et, struct extent_info *ei,
> 				struct rb_node *parent, struct rb_node **p)
> {
> 	struct extent_node *en;
> 
> 	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
> 	if (!en)
> 		return NULL;
> 
> 	en->ei = *ei;
> 	INIT_LIST_HEAD(&en->list);
> 
> 	rb_link_node(&en->rb_node, parent, p);
> 	rb_insert_color(&en->rb_node, &et->root);
> 	et->count++;
> 	atomic_inc(&sbi->total_ext_node);
> 	return en;
> }
> 
> static void __detach_extent_node(struct f2fs_sb_info *sbi,
> 				struct extent_tree *et, struct extent_node *en)
> {
> 	rb_erase(&en->rb_node, &et->root);
> 	et->count--;
> 	atomic_dec(&sbi->total_ext_node);
> }

Looks good to me.

> 
> > {
> > > +
> > > +	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
> > 
> > 	en = f2fs_kmem_cache_alloc(.., GFP_ATOMIC);
> 
> We should avoid cond_resched() in f2fs_kmem_cache_alloc when we are holding write_lock.
> 
> IMO, it's better to return NULL if we fail to alloc extent_node here.
> Otherwise we'd better alloc extent_node before write_lock.

Oh, it's write_lock. Okay.

> 
> > 
> > > +	en->ei = *ei;
> > > +	INIT_LIST_HEAD(&en->list);
> > > +
> > > +	rb_link_node(&en->rb_node, parent, p);
> > > +	rb_insert_color(&en->rb_node, &et->root);
> > > +	atomic_inc(&sbi->total_ext_node);
> > > +	et->count++;
> > 
> > }
> > 
> > > +
> > > +	return en;
> > > +}
> > > +
> > > +static struct extent_node *__remove_extent_tree(struct extent_tree *et,
> > > +							unsigned int fofs)
> > 
> > This is __detach_extent_node()?
> > 
> > > +{
> > > +	struct rb_node *p = et->root.rb_node;
> > > +	struct extent_node *en;
> > > +
> > > +	while (p) {
> > > +		en = rb_entry(p, struct extent_node, rb_node);
> > > +
> > > +		if (fofs < en->ei.fofs)
> > > +			p = p->rb_left;
> > 
> > Coding style.
> > 
> > if () {
> > } else {
> > }
> 
> will fix.
> 
> > 
> > > +		else if (fofs >= en->ei.fofs + en->ei.len)
> > > +			p = p->rb_right;
> > > +		else {
> > > +			rb_erase(&en->rb_node, &et->root);
> > > +			et->count--;
> > 
> > Add here?
> > 			atomic_dec(&sbi->total_ext_node);
> 
> Agree, see __detach_extent_node() implementation above.
> 
> > 
> > > +			return en;
> > > +		}
> > > +	}
> > > +	return NULL;
> > > +}
> > > +
> > > +static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
> > > +					struct extent_tree *et, bool free_all)
> > > +{
> > > +	struct rb_node *node, *next;
> > > +	struct extent_node *en;
> > > +	unsigned int count = et->count;
> > > +
> > > +	node = rb_first(&et->root);
> > > +	while (node) {
> > > +		next = rb_next(node);
> > > +		en = rb_entry(node, struct extent_node, rb_node);
> > > +
> > > +		if (free_all) {
> > > +			spin_lock(&sbi->extent_lock);
> > > +			if (!list_empty(&en->list))
> > > +				list_del_init(&en->list);
> > > +			spin_unlock(&sbi->extent_lock);
> > > +		}
> > > +
> > > +		if (free_all || list_empty(&en->list)) {
> > > +			rb_erase(node, &et->root);
> > > +			kmem_cache_free(extent_node_slab, en);
> > > +			atomic_dec(&sbi->total_ext_node);
> > > +			et->count--;
> > > +		}
> > > +		node = next;
> > > +	}
> > > +
> > > +	return count - et->count;
> > > +}
> > > +
> > > +static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> > > +							struct extent_info *ei)
> > > +{
> > > +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > > +	struct extent_tree *et;
> > > +	struct extent_node *en;
> > > +
> > > +	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
> > > +		return false;
> > > +
> > > +	down_read(&sbi->extent_tree_lock);
> > > +	et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
> > > +	if (!et) {
> > > +		up_read(&sbi->extent_tree_lock);
> > > +		return false;
> > > +	}
> > > +	atomic_inc(&et->refcount);
> > > +	up_read(&sbi->extent_tree_lock);
> > > +
> > > +	read_lock(&et->lock);
> > > +	en = __lookup_extent_tree(et, pgofs);
> > > +	if (en) {
> > > +		*ei = en->ei;
> > > +		spin_lock(&sbi->extent_lock);
> > > +		if (!list_empty(&en->list))
> > > +			list_move_tail(&en->list, &sbi->extent_list);
> > > +		spin_unlock(&sbi->extent_lock);
> > > +		stat_inc_read_hit(sbi->sb);
> > > +	}
> > > +	stat_inc_total_hit(sbi->sb);
> > > +	read_unlock(&et->lock);
> > > +
> > > +	atomic_dec(&et->refcount);
> > > +	return en ? true : false;
> > > +}
> > > +
> > > +static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
> > > +							block_t blkaddr)
> > > +{
> > > +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > > +	nid_t ino = inode->i_ino;
> > > +	struct extent_tree *et;
> > > +	struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
> > > +	struct extent_node *den = NULL;
> > > +	struct extent_info *pei;
> > > +	struct extent_info ei;
> > > +	unsigned int endofs;
> > > +
> > > +	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
> > > +		return;
> > > +
> > > +retry:
> > > +	down_write(&sbi->extent_tree_lock);
> > > +	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
> > > +	if (!et) {
> > > +		et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);
> > 
> > et = f2fs_kmem_cache_alloc(.., GFP_ATOMIC);
> 
> How about modifying as below to avoid holding extent_tree_lock for long time?

Hmm. I don't think it doesn't take such the long time.
It's GFP_ATOMIC.

Moreover, for radix_tree, I prefer to use f2fs_radix_tree_insert with GFP_NOIO.
Since, we actually don't need to call kmem_cache_free.

> 
> et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);
> if (!et) {
> 	up_write(&sbi->extent_tree_lock);
> 	cond_resched();
> 	goto retry;
> }
> 
> > 
> > > +		if (!et) {
> > > +			up_write(&sbi->extent_tree_lock);
> > > +			goto retry;
> > > +		}
> > > +		if (radix_tree_insert(&sbi->extent_tree_root, ino, et)) {
> > > +			up_write(&sbi->extent_tree_lock);
> > > +			kmem_cache_free(extent_tree_slab, et);
> 
> cond_resched()?

I'm not sure why this should be needed.
There is rw_semaphore, so we have already a rescheduling point.

> 
> > > +			goto retry;
> > > +		}
> > > +		memset(et, 0, sizeof(struct extent_tree));
> > > +		et->ino = ino;
> > > +		et->root = RB_ROOT;
> > > +		rwlock_init(&et->lock);
> > > +		atomic_set(&et->refcount, 0);
> > > +		et->count = 0;
> > > +		sbi->total_ext_tree++;
> > > +	}
> > > +	atomic_inc(&et->refcount);
> > > +	up_write(&sbi->extent_tree_lock);
> > > +
> > > +	write_lock(&et->lock);
> > > +
> > > +	/* 1. lookup and remove exist extent info in cache */
> > 
> >                                 existing
> 
> got it.
> 
> > 
> > > +	en = __remove_extent_tree(et, fofs);
> > 
> > en = __detach_extent_node();?
> 
> OK, it seems that most codes of __remove_extent_tree and __lookup_extent_tree 
> are the same, so here I want to remove extent node like this:
> 
> 	en = __lookup_extent_tree(et, fofs);
> 	if (!en)
> 		goto update_extent;
> 
> 	dei = en->ei;
> 	__detach_extent_node(sbi, et, en);
> 
> How do you think?

Looks good to me.

> 
> > 
> > > +	if (!en)
> > > +		goto update_extent;
> > > +
> > > +	pei = &en->ei;
> > > +	/* 2. if extent can be split more, split and insert the left part */
> > > +	if (pei->len > 1) {
> > > +		/*  insert left part of split extent into cache */
> > > +		if (pei->fofs < fofs) {
> > > +			set_extent_info(&ei, pei->fofs, pei->blk,
> > > +							fofs - pei->fofs);
> > > +			en1 = __insert_extent_tree(sbi, et, &ei, NULL);
> > > +		}
> > > +
> > > +		/* insert right part of split extent into cache */
> > > +		endofs = pei->fofs + pei->len - 1;
> > > +		if (endofs > fofs) {
> > > +			set_extent_info(&ei, fofs + 1,
> > > +				fofs - pei->fofs + pei->blk, endofs - fofs);
> > > +			en2 = __insert_extent_tree(sbi, et, &ei, NULL);
> > > +		}
> > > +	}
> > > +
> > > +update_extent:
> > > +	/* 3. update extent in extent cache */
> > > +	if (blkaddr) {
> > > +		set_extent_info(&ei, fofs, blkaddr, 1);
> > > +		en3 = __insert_extent_tree(sbi, et, &ei, &den);
> > > +	}
> > > +
> > > +	/* 4. update in global extent list */
> > > +	spin_lock(&sbi->extent_lock);
> > > +	if (en && !list_empty(&en->list))
> > > +		list_del_init(&en->list);
> > > +	/*
> > > +	 * en1 and en2 split from en, they will become more and more smaller
> > > +	 * fragments after splitting several times. So if the length is smaller
> > > +	 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent_list,
> > > +	 * but just waiting shrinker to free them for reclaiming when OOM.
> > > +	 */
> > 
> > Can we just remove en1 and en2 in __insert_extent_tree?
> 
> en1 and en2 is newly added, in __attach_extent_node we do not add en1 and en2 into
> lru list, so if you do not want to keep split part in lru list, let's just remove
> the below codes.

What I meant was, how about avoiding attaching en1 and en2, if they are splits whose
lens are less than F2FS_MIN_EXTENT_LEN in advance?

> 
> > 
> > > +	if (en1 && en1->ei.len >= F2FS_MIN_EXTENT_LEN)
> > > +		list_add_tail(&en1->list, &sbi->extent_list);
> > > +	if (en2 && en2->ei.len >= F2FS_MIN_EXTENT_LEN)
> > > +		list_add_tail(&en2->list, &sbi->extent_list);
> > > +	if (en3) {
> > > +		if (list_empty(&en3->list))
> > > +			list_add_tail(&en3->list, &sbi->extent_list);
> > > +		else
> > > +			list_move_tail(&en3->list, &sbi->extent_list);
> > > +	}
> > > +	if (den && !list_empty(&den->list))
> > > +		list_del_init(&den->list);
> > > +	spin_unlock(&sbi->extent_lock);
> > > +
> > > +	if (en) {
> > > +		kmem_cache_free(extent_node_slab, en);
> > > +		atomic_dec(&sbi->total_ext_node);
> > 
> > 		--> move into __detach_extent_node().
> 
> will do.
> 
> > 
> > > +	}
> > > +	if (den) {
> > > +		kmem_cache_free(extent_node_slab, den);
> > > +		atomic_dec(&sbi->total_ext_node);
> > > +	}
> > > +
> > > +	write_unlock(&et->lock);
> > > +	atomic_dec(&et->refcount);
> > > +}
> > > +
> > > +void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
> > > +{
> > > +	struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
> > > +	struct extent_node *en, *tmp;
> > > +	unsigned long ino = F2FS_ROOT_INO(sbi);
> > > +	struct radix_tree_iter iter;
> > > +	void **slot;
> > > +	unsigned int found;
> > > +	unsigned int node_cnt = 0, tree_cnt = 0;
> > > +
> > > +	if (available_free_memory(sbi, EXTENT_CACHE))
> > > +		return;
> > > +
> > > +	spin_lock(&sbi->extent_lock);
> > > +	list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
> > > +		if (!nr_shrink--)
> > > +			break;
> > > +		list_del_init(&en->list);
> > > +	}
> > > +	spin_unlock(&sbi->extent_lock);
> > > +
> > > +	down_read(&sbi->extent_tree_lock);
> > > +	while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
> > > +				(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
> > > +		unsigned i;
> > > +
> > > +		ino = treevec[found - 1]->ino + 1;
> > > +		for (i = 0; i < found; i++) {
> > > +			struct extent_tree *et = treevec[i];
> > > +
> > > +			atomic_inc(&et->refcount);
> > > +			write_lock(&et->lock);
> > > +			node_cnt += __free_extent_tree(sbi, et, false);
> > > +			write_unlock(&et->lock);
> > > +			atomic_dec(&et->refcount);
> > > +		}
> > > +	}
> > > +	up_read(&sbi->extent_tree_lock);
> > > +
> > > +	down_write(&sbi->extent_tree_lock);
> > > +	radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
> > > +							F2FS_ROOT_INO(sbi)) {
> > > +		struct extent_tree *et = (struct extent_tree *)*slot;
> > > +
> > > +		if (!atomic_read(&et->refcount) && !et->count) {
> > > +			radix_tree_delete(&sbi->extent_tree_root, et->ino);
> > > +			kmem_cache_free(extent_tree_slab, et);
> > > +			sbi->total_ext_tree--;
> > > +			tree_cnt++;
> > 
> > No use of tree_cnt.
> 
> This is used by trace function in RFC PATCH 10/10.

Well, then, it needs to add tree_cnt in Patch #10. :)

Thanks,

> 
> Thanks for your review! :)
> 
> Regards,
> Yu
> 
> > 
> > Thanks,
> 
> [snip]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
  2015-01-23 19:43     ` Jaegeuk Kim
@ 2015-01-26  5:39       ` Chao Yu
  0 siblings, 0 replies; 9+ messages in thread
From: Chao Yu @ 2015-01-26  5:39 UTC (permalink / raw)
  To: 'Jaegeuk Kim'
  Cc: 'Changman Lee', linux-f2fs-devel, linux-kernel

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Saturday, January 24, 2015 3:43 AM
> To: Chao Yu
> Cc: 'Changman Lee'; linux-f2fs-devel@lists.sourceforge.net; linux-kernel@vger.kernel.org
> Subject: Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
> 
> On Fri, Jan 23, 2015 at 02:15:56PM +0800, Chao Yu wrote:
> > Hi Jaegeuk,
> >
> > > -----Original Message-----
> > > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > > Sent: Friday, January 23, 2015 9:48 AM
> > > To: Chao Yu
> > > Cc: Changman Lee; linux-f2fs-devel@lists.sourceforge.net; linux-kernel@vger.kernel.org
> > > Subject: Re: [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache
> > >
> > > On Mon, Jan 12, 2015 at 03:14:48PM +0800, Chao Yu wrote:
> > > > This patch adds core functions including slab cache init function and
> > > > init/lookup/update/shrink/destroy function for rb-tree based extent cache.
> > > >
> > > > Thank Jaegeuk Kim and Changman Lee as they gave much suggestion about detail
> > > > design and implementation of extent cache.
> > > >
> > > > Todo:
> > > >  * add a cached_ei into struct extent_tree for a quick recent cache.
> > > >  * register rb-based extent cache shrink with mm shrink interface.
> > > >  * disable dir inode's extent cache.
> > > >
> 

[snip]

> > > > +static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
> > > > +							block_t blkaddr)
> > > > +{
> > > > +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > > > +	nid_t ino = inode->i_ino;
> > > > +	struct extent_tree *et;
> > > > +	struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
> > > > +	struct extent_node *den = NULL;
> > > > +	struct extent_info *pei;
> > > > +	struct extent_info ei;
> > > > +	unsigned int endofs;
> > > > +
> > > > +	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
> > > > +		return;
> > > > +
> > > > +retry:
> > > > +	down_write(&sbi->extent_tree_lock);
> > > > +	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
> > > > +	if (!et) {
> > > > +		et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);
> > >
> > > et = f2fs_kmem_cache_alloc(.., GFP_ATOMIC);
> >
> > How about modifying as below to avoid holding extent_tree_lock for long time?
> 
> Hmm. I don't think it doesn't take such the long time.
> It's GFP_ATOMIC.
> 
> Moreover, for radix_tree, I prefer to use f2fs_radix_tree_insert with GFP_NOIO.
> Since, we actually don't need to call kmem_cache_free.

How about use following code?

	f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
	f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);

in init_extent_cache_info()
	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);

> 
> >
> > et = kmem_cache_alloc(extent_tree_slab, GFP_ATOMIC);
> > if (!et) {
> > 	up_write(&sbi->extent_tree_lock);
> > 	cond_resched();
> > 	goto retry;
> > }
> >
> > >
> > > > +		if (!et) {
> > > > +			up_write(&sbi->extent_tree_lock);
> > > > +			goto retry;
> > > > +		}
> > > > +		if (radix_tree_insert(&sbi->extent_tree_root, ino, et)) {
> > > > +			up_write(&sbi->extent_tree_lock);
> > > > +			kmem_cache_free(extent_tree_slab, et);
> >
> > cond_resched()?
> 
> I'm not sure why this should be needed.
> There is rw_semaphore, so we have already a rescheduling point.
> 

[snip]

> > > Can we just remove en1 and en2 in __insert_extent_tree?
> >
> > en1 and en2 is newly added, in __attach_extent_node we do not add en1 and en2 into
> > lru list, so if you do not want to keep split part in lru list, let's just remove
> > the below codes.
> 
> What I meant was, how about avoiding attaching en1 and en2, if they are splits whose
> lens are less than F2FS_MIN_EXTENT_LEN in advance?

Oh, it's better, I will do it.

[snip]

> > > No use of tree_cnt.
> >
> > This is used by trace function in RFC PATCH 10/10.
> 
> Well, then, it needs to add tree_cnt in Patch #10. :)

OK, let's move it. :)

Thanks,

> 
> Thanks,
> 
> >
> > Thanks for your review! :)
> >
> > Regards,
> > Yu
> >
> > >
> > > Thanks,
> >
> > [snip]


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-01-26  5:40 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-12  7:14 [f2fs-dev][RFC PATCH 06/10] f2fs: add core functions for rb-tree extent cache Chao Yu
2015-01-20 15:06 ` Changman Lee
2015-01-21  8:41   ` Chao Yu
2015-01-21 22:25     ` Changman Lee
2015-01-22  1:32       ` Chao Yu
2015-01-23  1:48 ` Jaegeuk Kim
2015-01-23  6:15   ` Chao Yu
2015-01-23 19:43     ` Jaegeuk Kim
2015-01-26  5:39       ` Chao Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).