LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 01/10] staging: lustre: ldlm: store name directly in namespace.
  2018-05-01  3:52 [PATCH 00/10] staging: lustre: assorted improvements NeilBrown
  2018-05-01  3:52 ` [PATCH 02/10] staging: lustre: make struct lu_site_bkt_data private NeilBrown
@ 2018-05-01  3:52 ` NeilBrown
  2018-05-01  4:04   ` Dilger, Andreas
  2018-05-02 18:11   ` James Simmons
  2018-05-01  3:52 ` [PATCH 03/10] staging: lustre: lu_object: discard extra lru count NeilBrown
                   ` (7 subsequent siblings)
  9 siblings, 2 replies; 28+ messages in thread
From: NeilBrown @ 2018-05-01  3:52 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

Rather than storing the name of a namespace in the
hash table, store it directly in the namespace.
This will allow the hashtable to be changed to use
rhashtable.

Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Andreas Dilger <andreas.dilger@dilger.ca>
---
 drivers/staging/lustre/lustre/include/lustre_dlm.h |    5 ++++-
 drivers/staging/lustre/lustre/ldlm/ldlm_resource.c |    5 +++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
index d668d86423a4..b3532adac31c 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -362,6 +362,9 @@ struct ldlm_namespace {
 	/** Flag indicating if namespace is on client instead of server */
 	enum ldlm_side		ns_client;
 
+	/** name of this namespace */
+	char			*ns_name;
+
 	/** Resource hash table for namespace. */
 	struct cfs_hash		*ns_rs_hash;
 
@@ -878,7 +881,7 @@ static inline bool ldlm_has_layout(struct ldlm_lock *lock)
 static inline char *
 ldlm_ns_name(struct ldlm_namespace *ns)
 {
-	return ns->ns_rs_hash->hs_name;
+	return ns->ns_name;
 }
 
 static inline struct ldlm_namespace *
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
index 6c615b6e9bdc..43bbc5fd94cc 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
@@ -688,6 +688,9 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
 	ns->ns_obd      = obd;
 	ns->ns_appetite = apt;
 	ns->ns_client   = client;
+	ns->ns_name     = kstrdup(name, GFP_KERNEL);
+	if (!ns->ns_name)
+		goto out_hash;
 
 	INIT_LIST_HEAD(&ns->ns_list_chain);
 	INIT_LIST_HEAD(&ns->ns_unused_list);
@@ -730,6 +733,7 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
 	ldlm_namespace_sysfs_unregister(ns);
 	ldlm_namespace_cleanup(ns, 0);
 out_hash:
+	kfree(ns->ns_name);
 	cfs_hash_putref(ns->ns_rs_hash);
 out_ns:
 	kfree(ns);
@@ -993,6 +997,7 @@ void ldlm_namespace_free_post(struct ldlm_namespace *ns)
 	ldlm_namespace_debugfs_unregister(ns);
 	ldlm_namespace_sysfs_unregister(ns);
 	cfs_hash_putref(ns->ns_rs_hash);
+	kfree(ns->ns_name);
 	/* Namespace \a ns should be not on list at this time, otherwise
 	 * this will cause issues related to using freed \a ns in poold
 	 * thread.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 03/10] staging: lustre: lu_object: discard extra lru count.
  2018-05-01  3:52 [PATCH 00/10] staging: lustre: assorted improvements NeilBrown
  2018-05-01  3:52 ` [PATCH 02/10] staging: lustre: make struct lu_site_bkt_data private NeilBrown
  2018-05-01  3:52 ` [PATCH 01/10] staging: lustre: ldlm: store name directly in namespace NeilBrown
@ 2018-05-01  3:52 ` NeilBrown
  2018-05-01  4:19   ` Dilger, Andreas
  2018-05-01  3:52 ` [PATCH 07/10] staging: lustre: llite: remove redundant lookup in dump_pgcache NeilBrown
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 28+ messages in thread
From: NeilBrown @ 2018-05-01  3:52 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

lu_object maintains 2 lru counts.
One is a per-bucket lsb_lru_len.
The other is the per-cpu ls_lru_len_counter.

The only times the per-bucket counters are use are:
- a debug message when an object is added
- in lu_site_stats_get when all the counters are combined.

The debug message is not essential, and the per-cpu counter
can be used to get the combined total.

So discard the per-bucket lsb_lru_len.

Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-by: Andreas Dilger <andreas.dilger@dilger.ca>
---
 drivers/staging/lustre/lustre/obdclass/lu_object.c |   24 ++++++++------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 2a8a25d6edb5..2bf089817157 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -57,10 +57,6 @@
 #include <linux/list.h>
 
 struct lu_site_bkt_data {
-	/**
-	 * number of object in this bucket on the lsb_lru list.
-	 */
-	long			lsb_lru_len;
 	/**
 	 * LRU list, updated on each access to object. Protected by
 	 * bucket lock of lu_site::ls_obj_hash.
@@ -187,10 +183,9 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o)
 	if (!lu_object_is_dying(top)) {
 		LASSERT(list_empty(&top->loh_lru));
 		list_add_tail(&top->loh_lru, &bkt->lsb_lru);
-		bkt->lsb_lru_len++;
 		percpu_counter_inc(&site->ls_lru_len_counter);
-		CDEBUG(D_INODE, "Add %p to site lru. hash: %p, bkt: %p, lru_len: %ld\n",
-		       o, site->ls_obj_hash, bkt, bkt->lsb_lru_len);
+		CDEBUG(D_INODE, "Add %p to site lru. hash: %p, bkt: %p\n",
+		       o, site->ls_obj_hash, bkt);
 		cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1);
 		return;
 	}
@@ -238,7 +233,6 @@ void lu_object_unhash(const struct lu_env *env, struct lu_object *o)
 
 			list_del_init(&top->loh_lru);
 			bkt = cfs_hash_bd_extra_get(obj_hash, &bd);
-			bkt->lsb_lru_len--;
 			percpu_counter_dec(&site->ls_lru_len_counter);
 		}
 		cfs_hash_bd_del_locked(obj_hash, &bd, &top->loh_hash);
@@ -422,7 +416,6 @@ int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s,
 			cfs_hash_bd_del_locked(s->ls_obj_hash,
 					       &bd2, &h->loh_hash);
 			list_move(&h->loh_lru, &dispose);
-			bkt->lsb_lru_len--;
 			percpu_counter_dec(&s->ls_lru_len_counter);
 			if (did_sth == 0)
 				did_sth = 1;
@@ -621,7 +614,6 @@ static struct lu_object *htable_lookup(struct lu_site *s,
 		lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
 		if (!list_empty(&h->loh_lru)) {
 			list_del_init(&h->loh_lru);
-			bkt->lsb_lru_len--;
 			percpu_counter_dec(&s->ls_lru_len_counter);
 		}
 		return lu_object_top(h);
@@ -1834,19 +1826,21 @@ struct lu_site_stats {
 	unsigned int	lss_busy;
 };
 
-static void lu_site_stats_get(struct cfs_hash *hs,
+static void lu_site_stats_get(const struct lu_site *s,
 			      struct lu_site_stats *stats, int populated)
 {
+	struct cfs_hash *hs = s->ls_obj_hash;
 	struct cfs_hash_bd bd;
 	unsigned int i;
+	/* percpu_counter_read_positive() won't accept a const pointer */
+	struct lu_site *s2 = (struct lu_site *)s;
 
+	stats->lss_busy += cfs_hash_size_get(hs) -
+		percpu_counter_read_positive(&s2->ls_lru_len_counter);
 	cfs_hash_for_each_bucket(hs, &bd, i) {
-		struct lu_site_bkt_data *bkt = cfs_hash_bd_extra_get(hs, &bd);
 		struct hlist_head	*hhead;
 
 		cfs_hash_bd_lock(hs, &bd, 1);
-		stats->lss_busy  +=
-			cfs_hash_bd_count_get(&bd) - bkt->lsb_lru_len;
 		stats->lss_total += cfs_hash_bd_count_get(&bd);
 		stats->lss_max_search = max((int)stats->lss_max_search,
 					    cfs_hash_bd_depmax_get(&bd));
@@ -2039,7 +2033,7 @@ int lu_site_stats_print(const struct lu_site *s, struct seq_file *m)
 	struct lu_site_stats stats;
 
 	memset(&stats, 0, sizeof(stats));
-	lu_site_stats_get(s->ls_obj_hash, &stats, 1);
+	lu_site_stats_get(s, &stats, 1);
 
 	seq_printf(m, "%d/%d %d/%ld %d %d %d %d %d %d %d\n",
 		   stats.lss_busy,

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 02/10] staging: lustre: make struct lu_site_bkt_data private
  2018-05-01  3:52 [PATCH 00/10] staging: lustre: assorted improvements NeilBrown
@ 2018-05-01  3:52 ` NeilBrown
  2018-05-01  4:10   ` [lustre-devel] " Dilger, Andreas
  2018-05-02  3:02   ` James Simmons
  2018-05-01  3:52 ` [PATCH 01/10] staging: lustre: ldlm: store name directly in namespace NeilBrown
                   ` (8 subsequent siblings)
  9 siblings, 2 replies; 28+ messages in thread
From: NeilBrown @ 2018-05-01  3:52 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

This data structure only needs to be public so that
various modules can access a wait queue to wait for object
destruction.
If we provide a function to get the wait queue, rather than the
whole bucket, the structure can be made private.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/staging/lustre/lustre/include/lu_object.h  |   36 +-------------
 drivers/staging/lustre/lustre/llite/lcommon_cl.c   |    8 ++-
 drivers/staging/lustre/lustre/lov/lov_object.c     |    8 ++-
 drivers/staging/lustre/lustre/obdclass/lu_object.c |   50 +++++++++++++++++---
 4 files changed, 54 insertions(+), 48 deletions(-)

diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
index c3b0ed518819..f29bbca5af65 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -549,31 +549,7 @@ struct lu_object_header {
 };
 
 struct fld;
-
-struct lu_site_bkt_data {
-	/**
-	 * number of object in this bucket on the lsb_lru list.
-	 */
-	long			lsb_lru_len;
-	/**
-	 * LRU list, updated on each access to object. Protected by
-	 * bucket lock of lu_site::ls_obj_hash.
-	 *
-	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
-	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
-	 * of list_for_each_entry_safe_reverse()).
-	 */
-	struct list_head		lsb_lru;
-	/**
-	 * Wait-queue signaled when an object in this site is ultimately
-	 * destroyed (lu_object_free()). It is used by lu_object_find() to
-	 * wait before re-trying when object in the process of destruction is
-	 * found in the hash table.
-	 *
-	 * \see htable_lookup().
-	 */
-	wait_queue_head_t	       lsb_marche_funebre;
-};
+struct lu_site_bkt_data;
 
 enum {
 	LU_SS_CREATED	 = 0,
@@ -642,14 +618,8 @@ struct lu_site {
 	struct percpu_counter	 ls_lru_len_counter;
 };
 
-static inline struct lu_site_bkt_data *
-lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid)
-{
-	struct cfs_hash_bd bd;
-
-	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
-	return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
-}
+wait_queue_head_t *
+lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid);
 
 static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
 {
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
index df5c0c0ae703..d5b42fb1d601 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -211,12 +211,12 @@ static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
 
 	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
 		struct lu_site *site = obj->co_lu.lo_dev->ld_site;
-		struct lu_site_bkt_data *bkt;
+		wait_queue_head_t *wq;
 
-		bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
+		wq = lu_site_wq_from_fid(site, &header->loh_fid);
 
 		init_waitqueue_entry(&waiter, current);
-		add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+		add_wait_queue(wq, &waiter);
 
 		while (1) {
 			set_current_state(TASK_UNINTERRUPTIBLE);
@@ -226,7 +226,7 @@ static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
 		}
 
 		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+		remove_wait_queue(wq, &waiter);
 	}
 
 	cl_object_put(env, obj);
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
index f7c69680cb7d..adc90f310fd7 100644
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@@ -370,7 +370,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
 	struct cl_object	*sub;
 	struct lov_layout_raid0 *r0;
 	struct lu_site	  *site;
-	struct lu_site_bkt_data *bkt;
+	wait_queue_head_t *wq;
 	wait_queue_entry_t	  *waiter;
 
 	r0  = &lov->u.raid0;
@@ -378,7 +378,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
 
 	sub  = lovsub2cl(los);
 	site = sub->co_lu.lo_dev->ld_site;
-	bkt  = lu_site_bkt_from_fid(site, &sub->co_lu.lo_header->loh_fid);
+	wq   = lu_site_wq_from_fid(site, &sub->co_lu.lo_header->loh_fid);
 
 	cl_object_kill(env, sub);
 	/* release a reference to the sub-object and ... */
@@ -391,7 +391,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
 	if (r0->lo_sub[idx] == los) {
 		waiter = &lov_env_info(env)->lti_waiter;
 		init_waitqueue_entry(waiter, current);
-		add_wait_queue(&bkt->lsb_marche_funebre, waiter);
+		add_wait_queue(wq, waiter);
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		while (1) {
 			/* this wait-queue is signaled at the end of
@@ -408,7 +408,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
 				break;
 			}
 		}
-		remove_wait_queue(&bkt->lsb_marche_funebre, waiter);
+		remove_wait_queue(wq, waiter);
 	}
 	LASSERT(!r0->lo_sub[idx]);
 }
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 3de7dc0497c4..2a8a25d6edb5 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -56,6 +56,31 @@
 #include <lu_ref.h>
 #include <linux/list.h>
 
+struct lu_site_bkt_data {
+	/**
+	 * number of object in this bucket on the lsb_lru list.
+	 */
+	long			lsb_lru_len;
+	/**
+	 * LRU list, updated on each access to object. Protected by
+	 * bucket lock of lu_site::ls_obj_hash.
+	 *
+	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
+	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
+	 * of list_for_each_entry_safe_reverse()).
+	 */
+	struct list_head		lsb_lru;
+	/**
+	 * Wait-queue signaled when an object in this site is ultimately
+	 * destroyed (lu_object_free()). It is used by lu_object_find() to
+	 * wait before re-trying when object in the process of destruction is
+	 * found in the hash table.
+	 *
+	 * \see htable_lookup().
+	 */
+	wait_queue_head_t	       lsb_marche_funebre;
+};
+
 enum {
 	LU_CACHE_PERCENT_MAX	 = 50,
 	LU_CACHE_PERCENT_DEFAULT = 20
@@ -88,6 +113,17 @@ MODULE_PARM_DESC(lu_cache_nr, "Maximum number of objects in lu_object cache");
 static void lu_object_free(const struct lu_env *env, struct lu_object *o);
 static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx);
 
+wait_queue_head_t *
+lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid)
+{
+	struct cfs_hash_bd bd;
+	struct lu_site_bkt_data *bkt;
+
+	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
+	bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
+	return &bkt->lsb_marche_funebre;
+}
+
 /**
  * Decrease reference counter on object. If last reference is freed, return
  * object to the cache, unless lu_object_is_dying(o) holds. In the latter
@@ -288,7 +324,7 @@ static struct lu_object *lu_object_alloc(const struct lu_env *env,
  */
 static void lu_object_free(const struct lu_env *env, struct lu_object *o)
 {
-	struct lu_site_bkt_data *bkt;
+	wait_queue_head_t *wq;
 	struct lu_site	  *site;
 	struct lu_object	*scan;
 	struct list_head	      *layers;
@@ -296,7 +332,7 @@ static void lu_object_free(const struct lu_env *env, struct lu_object *o)
 
 	site   = o->lo_dev->ld_site;
 	layers = &o->lo_header->loh_layers;
-	bkt    = lu_site_bkt_from_fid(site, &o->lo_header->loh_fid);
+	wq     = lu_site_wq_from_fid(site, &o->lo_header->loh_fid);
 	/*
 	 * First call ->loo_object_delete() method to release all resources.
 	 */
@@ -324,8 +360,8 @@ static void lu_object_free(const struct lu_env *env, struct lu_object *o)
 		o->lo_ops->loo_object_free(env, o);
 	}
 
-	if (waitqueue_active(&bkt->lsb_marche_funebre))
-		wake_up_all(&bkt->lsb_marche_funebre);
+	if (waitqueue_active(wq))
+		wake_up_all(wq);
 }
 
 /**
@@ -749,7 +785,7 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
 				    const struct lu_fid *f,
 				    const struct lu_object_conf *conf)
 {
-	struct lu_site_bkt_data *bkt;
+	wait_queue_head_t	*wq;
 	struct lu_object	*obj;
 	wait_queue_entry_t	   wait;
 
@@ -762,8 +798,8 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
 		 * wait queue.
 		 */
 		schedule();
-		bkt = lu_site_bkt_from_fid(dev->ld_site, (void *)f);
-		remove_wait_queue(&bkt->lsb_marche_funebre, &wait);
+		wq = lu_site_wq_from_fid(dev->ld_site, (void *)f);
+		remove_wait_queue(wq, &wait);
 	}
 }
 EXPORT_SYMBOL(lu_object_find_at);

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 00/10] staging: lustre: assorted improvements.
@ 2018-05-01  3:52 NeilBrown
  2018-05-01  3:52 ` [PATCH 02/10] staging: lustre: make struct lu_site_bkt_data private NeilBrown
                   ` (9 more replies)
  0 siblings, 10 replies; 28+ messages in thread
From: NeilBrown @ 2018-05-01  3:52 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

First 6 patches are clean-up patches that I pulled out
of my rhashtable series.  I think these stand alone as
good cleanups, and having them upstream makes the rhashtable
series shorter to ease further review.

Second 2 are revised versions of patches I sent previously that
had conflicts with other patches that landed first.

Last is a bugfix for an issue James mentioned a while back.

Thanks,
NeilBrown


---

NeilBrown (10):
      staging: lustre: ldlm: store name directly in namespace.
      staging: lustre: make struct lu_site_bkt_data private
      staging: lustre: lu_object: discard extra lru count.
      staging: lustre: lu_object: move retry logic inside htable_lookup
      staging: lustre: fold lu_object_new() into lu_object_find_at()
      staging: lustre: llite: use more private data in dump_pgcache
      staging: lustre: llite: remove redundant lookup in dump_pgcache
      staging: lustre: move misc-device registration closer to related code.
      staging: lustre: move remaining code from linux-module.c to module.c
      staging: lustre: fix error deref in ll_splice_alias().


 .../staging/lustre/include/linux/libcfs/libcfs.h   |    6 -
 drivers/staging/lustre/lnet/libcfs/Makefile        |    1 
 .../lustre/lnet/libcfs/linux/linux-module.c        |  196 --------------------
 drivers/staging/lustre/lnet/libcfs/module.c        |  162 ++++++++++++++++-
 drivers/staging/lustre/lustre/include/lu_object.h  |   36 ----
 drivers/staging/lustre/lustre/include/lustre_dlm.h |    5 -
 drivers/staging/lustre/lustre/ldlm/ldlm_resource.c |    5 +
 drivers/staging/lustre/lustre/llite/lcommon_cl.c   |    8 -
 drivers/staging/lustre/lustre/llite/namei.c        |    8 +
 drivers/staging/lustre/lustre/llite/vvp_dev.c      |  169 ++++++++---------
 drivers/staging/lustre/lustre/lov/lov_object.c     |    8 -
 drivers/staging/lustre/lustre/obdclass/lu_object.c |  169 ++++++++---------
 12 files changed, 341 insertions(+), 432 deletions(-)
 delete mode 100644 drivers/staging/lustre/lnet/libcfs/linux/linux-module.c

--
Signature

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 05/10] staging: lustre: fold lu_object_new() into lu_object_find_at()
  2018-05-01  3:52 [PATCH 00/10] staging: lustre: assorted improvements NeilBrown
                   ` (5 preceding siblings ...)
  2018-05-01  3:52 ` [PATCH 04/10] staging: lustre: lu_object: move retry logic inside htable_lookup NeilBrown
@ 2018-05-01  3:52 ` NeilBrown
  2018-05-01  3:52 ` [PATCH 10/10] staging: lustre: fix error deref in ll_splice_alias() NeilBrown
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 28+ messages in thread
From: NeilBrown @ 2018-05-01  3:52 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

lu_object_new() duplicates a lot of code that is in
lu_object_find_at().
There is no real need for a separate function, it is simpler just
to skip the bits of lu_object_find_at() that we don't
want in the LOC_F_NEW case.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/staging/lustre/lustre/obdclass/lu_object.c |   44 +++++---------------
 1 file changed, 12 insertions(+), 32 deletions(-)

diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 93daa52e2535..9721b3af8ea8 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -678,29 +678,6 @@ static void lu_object_limit(const struct lu_env *env, struct lu_device *dev)
 			      false);
 }
 
-static struct lu_object *lu_object_new(const struct lu_env *env,
-				       struct lu_device *dev,
-				       const struct lu_fid *f,
-				       const struct lu_object_conf *conf)
-{
-	struct lu_object	*o;
-	struct cfs_hash	      *hs;
-	struct cfs_hash_bd	    bd;
-
-	o = lu_object_alloc(env, dev, f, conf);
-	if (IS_ERR(o))
-		return o;
-
-	hs = dev->ld_site->ls_obj_hash;
-	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
-	cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
-	cfs_hash_bd_unlock(hs, &bd, 1);
-
-	lu_object_limit(env, dev);
-
-	return o;
-}
-
 /**
  * Much like lu_object_find(), but top level device of object is specifically
  * \a dev rather than top level device of the site. This interface allows
@@ -736,18 +713,18 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
 	 * just alloc and insert directly.
 	 *
 	 */
-	if (conf && conf->loc_flags & LOC_F_NEW)
-		return lu_object_new(env, dev, f, conf);
-
 	s  = dev->ld_site;
 	hs = s->ls_obj_hash;
-	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 0);
-	o = htable_lookup(s, &bd, f, &version);
-	cfs_hash_bd_unlock(hs, &bd, 0);
 
-	if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
-		return o;
+	cfs_hash_bd_get(hs, f, &bd);
+	if (!(conf && conf->loc_flags & LOC_F_NEW)) {
+		cfs_hash_bd_lock(hs, &bd, 0);
+		o = htable_lookup(s, &bd, f, &version);
+		cfs_hash_bd_unlock(hs, &bd, 0);
 
+		if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
+			return o;
+	}
 	/*
 	 * Allocate new object. This may result in rather complicated
 	 * operations, including fld queries, inode loading, etc.
@@ -760,7 +737,10 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
 
 	cfs_hash_bd_lock(hs, &bd, 1);
 
-	shadow = htable_lookup(s, &bd, f, &version);
+	if (conf && conf->loc_flags & LOC_F_NEW)
+		shadow = ERR_PTR(-ENOENT);
+	else
+		shadow = htable_lookup(s, &bd, f, &version);
 	if (likely(PTR_ERR(shadow) == -ENOENT)) {
 		cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
 		cfs_hash_bd_unlock(hs, &bd, 1);

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 06/10] staging: lustre: llite: use more private data in dump_pgcache
  2018-05-01  3:52 [PATCH 00/10] staging: lustre: assorted improvements NeilBrown
                   ` (7 preceding siblings ...)
  2018-05-01  3:52 ` [PATCH 10/10] staging: lustre: fix error deref in ll_splice_alias() NeilBrown
@ 2018-05-01  3:52 ` NeilBrown
  2018-05-01  3:52 ` [PATCH 09/10] staging: lustre: move remaining code from linux-module.c to module.c NeilBrown
  9 siblings, 0 replies; 28+ messages in thread
From: NeilBrown @ 2018-05-01  3:52 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

The dump_page_cache debugfs file allocates and frees an 'env' in each
call to vvp_pgcache_start,next,show.  This is likely to be fast, but
does introduce the need to check for errors.

It is reasonable to allocate a single 'env' when the file is opened,
and use that throughout.

So create 'seq_private' structure which stores the sbi, env, and
refcheck, and attach this to the seqfile.

Then use it throughout instead of allocating 'env' repeatedly.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/staging/lustre/lustre/llite/vvp_dev.c |  150 ++++++++++++-------------
 1 file changed, 72 insertions(+), 78 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
index 987c03b058e6..a2619dc04a7f 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -390,6 +390,12 @@ struct vvp_pgcache_id {
 	struct lu_object_header *vpi_obj;
 };
 
+struct seq_private {
+	struct ll_sb_info	*sbi;
+	struct lu_env		*env;
+	u16			refcheck;
+};
+
 static void vvp_pgcache_id_unpack(loff_t pos, struct vvp_pgcache_id *id)
 {
 	BUILD_BUG_ON(sizeof(pos) != sizeof(__u64));
@@ -531,95 +537,71 @@ static void vvp_pgcache_page_show(const struct lu_env *env,
 
 static int vvp_pgcache_show(struct seq_file *f, void *v)
 {
+	struct seq_private	*priv = f->private;
 	loff_t		   pos;
-	struct ll_sb_info       *sbi;
 	struct cl_object	*clob;
-	struct lu_env	   *env;
 	struct vvp_pgcache_id    id;
-	u16 refcheck;
-	int		      result;
 
-	env = cl_env_get(&refcheck);
-	if (!IS_ERR(env)) {
-		pos = *(loff_t *)v;
-		vvp_pgcache_id_unpack(pos, &id);
-		sbi = f->private;
-		clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
-		if (clob) {
-			struct inode *inode = vvp_object_inode(clob);
-			struct cl_page *page = NULL;
-			struct page *vmpage;
-
-			result = find_get_pages_contig(inode->i_mapping,
-						       id.vpi_index, 1,
-						       &vmpage);
-			if (result > 0) {
-				lock_page(vmpage);
-				page = cl_vmpage_page(vmpage, clob);
-				unlock_page(vmpage);
-				put_page(vmpage);
-			}
+	pos = *(loff_t *)v;
+	vvp_pgcache_id_unpack(pos, &id);
+	clob = vvp_pgcache_obj(priv->env, &priv->sbi->ll_cl->cd_lu_dev, &id);
+	if (clob) {
+		struct inode *inode = vvp_object_inode(clob);
+		struct cl_page *page = NULL;
+		struct page *vmpage;
+		int result;
+
+		result = find_get_pages_contig(inode->i_mapping,
+					       id.vpi_index, 1,
+					       &vmpage);
+		if (result > 0) {
+			lock_page(vmpage);
+			page = cl_vmpage_page(vmpage, clob);
+			unlock_page(vmpage);
+			put_page(vmpage);
+		}
 
-			seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
-				   PFID(lu_object_fid(&clob->co_lu)));
-			if (page) {
-				vvp_pgcache_page_show(env, f, page);
-				cl_page_put(env, page);
-			} else {
-				seq_puts(f, "missing\n");
-			}
-			lu_object_ref_del(&clob->co_lu, "dump", current);
-			cl_object_put(env, clob);
+		seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
+			   PFID(lu_object_fid(&clob->co_lu)));
+		if (page) {
+			vvp_pgcache_page_show(priv->env, f, page);
+			cl_page_put(priv->env, page);
 		} else {
-			seq_printf(f, "%llx missing\n", pos);
+			seq_puts(f, "missing\n");
 		}
-		cl_env_put(env, &refcheck);
-		result = 0;
+		lu_object_ref_del(&clob->co_lu, "dump", current);
+		cl_object_put(priv->env, clob);
 	} else {
-		result = PTR_ERR(env);
+		seq_printf(f, "%llx missing\n", pos);
 	}
-	return result;
+	return 0;
 }
 
 static void *vvp_pgcache_start(struct seq_file *f, loff_t *pos)
 {
-	struct ll_sb_info *sbi;
-	struct lu_env     *env;
-	u16 refcheck;
-
-	sbi = f->private;
+	struct seq_private	*priv = f->private;
 
-	env = cl_env_get(&refcheck);
-	if (!IS_ERR(env)) {
-		sbi = f->private;
-		if (sbi->ll_site->ls_obj_hash->hs_cur_bits >
-		    64 - PGC_OBJ_SHIFT) {
-			pos = ERR_PTR(-EFBIG);
-		} else {
-			*pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev,
-						*pos);
-			if (*pos == ~0ULL)
-				pos = NULL;
-		}
-		cl_env_put(env, &refcheck);
+	if (priv->sbi->ll_site->ls_obj_hash->hs_cur_bits >
+	    64 - PGC_OBJ_SHIFT) {
+		pos = ERR_PTR(-EFBIG);
+	} else {
+		*pos = vvp_pgcache_find(priv->env, &priv->sbi->ll_cl->cd_lu_dev,
+					*pos);
+		if (*pos == ~0ULL)
+			pos = NULL;
 	}
+
 	return pos;
 }
 
 static void *vvp_pgcache_next(struct seq_file *f, void *v, loff_t *pos)
 {
-	struct ll_sb_info *sbi;
-	struct lu_env     *env;
-	u16 refcheck;
+	struct seq_private *priv = f->private;
+
+	*pos = vvp_pgcache_find(priv->env, &priv->sbi->ll_cl->cd_lu_dev, *pos + 1);
+	if (*pos == ~0ULL)
+		pos = NULL;
 
-	env = cl_env_get(&refcheck);
-	if (!IS_ERR(env)) {
-		sbi = f->private;
-		*pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev, *pos + 1);
-		if (*pos == ~0ULL)
-			pos = NULL;
-		cl_env_put(env, &refcheck);
-	}
 	return pos;
 }
 
@@ -637,17 +619,29 @@ static const struct seq_operations vvp_pgcache_ops = {
 
 static int vvp_dump_pgcache_seq_open(struct inode *inode, struct file *filp)
 {
-	struct seq_file *seq;
-	int rc;
-
-	rc = seq_open(filp, &vvp_pgcache_ops);
-	if (rc)
-		return rc;
+	struct seq_private *priv;
+
+	priv = __seq_open_private(filp, &vvp_pgcache_ops, sizeof(*priv));
+	if (!priv)
+		return -ENOMEM;
+
+	priv->sbi = inode->i_private;
+	priv->env = cl_env_get(&priv->refcheck);
+	if (IS_ERR(priv->env)) {
+		int err = PTR_ERR(priv->env);
+		seq_release_private(inode, filp);
+		return err;
+	}
+	return 0;
+}
 
-	seq = filp->private_data;
-	seq->private = inode->i_private;
+static int vvp_dump_pgcache_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct seq_private *priv = seq->private;
 
-	return 0;
+	cl_env_put(priv->env, &priv->refcheck);
+	return seq_release_private(inode, file);
 }
 
 const struct file_operations vvp_dump_pgcache_file_ops = {
@@ -655,5 +649,5 @@ const struct file_operations vvp_dump_pgcache_file_ops = {
 	.open    = vvp_dump_pgcache_seq_open,
 	.read    = seq_read,
 	.llseek	 = seq_lseek,
-	.release = seq_release,
+	.release = vvp_dump_pgcache_seq_release,
 };

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 07/10] staging: lustre: llite: remove redundant lookup in dump_pgcache
  2018-05-01  3:52 [PATCH 00/10] staging: lustre: assorted improvements NeilBrown
                   ` (2 preceding siblings ...)
  2018-05-01  3:52 ` [PATCH 03/10] staging: lustre: lu_object: discard extra lru count NeilBrown
@ 2018-05-01  3:52 ` NeilBrown
  2018-05-01  3:52 ` [PATCH 08/10] staging: lustre: move misc-device registration closer to related code NeilBrown
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 28+ messages in thread
From: NeilBrown @ 2018-05-01  3:52 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

Both the 'next' and the 'show' functions for the dump_page_cache
seqfile perform a lookup based on the current file index.  This is
needless duplication.

The reason appears to be that the state that needs to be communicated
from "next" to "show" is two pointers, but seq_file only provides for
a single pointer to be returned from next and passed to show.

So make use of the new 'seq_private' structure to store the extra
pointer.
So when 'next' (or 'start') find something, it returns the page and
stores the clob in the private area.
'show' accepts the page as an argument, and finds the clob where it
was stored.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/staging/lustre/lustre/llite/vvp_dev.c |   97 +++++++++++--------------
 1 file changed, 41 insertions(+), 56 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
index a2619dc04a7f..39a85e967368 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -394,6 +394,7 @@ struct seq_private {
 	struct ll_sb_info	*sbi;
 	struct lu_env		*env;
 	u16			refcheck;
+	struct cl_object	*clob;
 };
 
 static void vvp_pgcache_id_unpack(loff_t pos, struct vvp_pgcache_id *id)
@@ -458,19 +459,20 @@ static struct cl_object *vvp_pgcache_obj(const struct lu_env *env,
 	return NULL;
 }
 
-static loff_t vvp_pgcache_find(const struct lu_env *env,
-			       struct lu_device *dev, loff_t pos)
+static struct page *vvp_pgcache_find(const struct lu_env *env,
+				     struct lu_device *dev,
+				     struct cl_object **clobp, loff_t *pos)
 {
 	struct cl_object     *clob;
 	struct lu_site       *site;
 	struct vvp_pgcache_id id;
 
 	site = dev->ld_site;
-	vvp_pgcache_id_unpack(pos, &id);
+	vvp_pgcache_id_unpack(*pos, &id);
 
 	while (1) {
 		if (id.vpi_bucket >= CFS_HASH_NHLIST(site->ls_obj_hash))
-			return ~0ULL;
+			return NULL;
 		clob = vvp_pgcache_obj(env, dev, &id);
 		if (clob) {
 			struct inode *inode = vvp_object_inode(clob);
@@ -482,20 +484,22 @@ static loff_t vvp_pgcache_find(const struct lu_env *env,
 			if (nr > 0) {
 				id.vpi_index = vmpage->index;
 				/* Cant support over 16T file */
-				nr = !(vmpage->index > 0xffffffff);
+				if (vmpage->index <= 0xffffffff) {
+					*clobp = clob;
+					*pos = vvp_pgcache_id_pack(&id);
+					return vmpage;
+				}
 				put_page(vmpage);
 			}
 
 			lu_object_ref_del(&clob->co_lu, "dump", current);
 			cl_object_put(env, clob);
-			if (nr > 0)
-				return vvp_pgcache_id_pack(&id);
 		}
 		/* to the next object. */
 		++id.vpi_depth;
 		id.vpi_depth &= 0xf;
 		if (id.vpi_depth == 0 && ++id.vpi_bucket == 0)
-			return ~0ULL;
+			return NULL;
 		id.vpi_index = 0;
 	}
 }
@@ -538,71 +542,52 @@ static void vvp_pgcache_page_show(const struct lu_env *env,
 static int vvp_pgcache_show(struct seq_file *f, void *v)
 {
 	struct seq_private	*priv = f->private;
-	loff_t		   pos;
-	struct cl_object	*clob;
-	struct vvp_pgcache_id    id;
-
-	pos = *(loff_t *)v;
-	vvp_pgcache_id_unpack(pos, &id);
-	clob = vvp_pgcache_obj(priv->env, &priv->sbi->ll_cl->cd_lu_dev, &id);
-	if (clob) {
-		struct inode *inode = vvp_object_inode(clob);
-		struct cl_page *page = NULL;
-		struct page *vmpage;
-		int result;
-
-		result = find_get_pages_contig(inode->i_mapping,
-					       id.vpi_index, 1,
-					       &vmpage);
-		if (result > 0) {
-			lock_page(vmpage);
-			page = cl_vmpage_page(vmpage, clob);
-			unlock_page(vmpage);
-			put_page(vmpage);
-		}
-
-		seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
-			   PFID(lu_object_fid(&clob->co_lu)));
-		if (page) {
-			vvp_pgcache_page_show(priv->env, f, page);
-			cl_page_put(priv->env, page);
-		} else {
-			seq_puts(f, "missing\n");
-		}
-		lu_object_ref_del(&clob->co_lu, "dump", current);
-		cl_object_put(priv->env, clob);
+	struct page		*vmpage = v;
+	struct cl_page		*page;
+
+	seq_printf(f, "%8lx@" DFID ": ", vmpage->index,
+		   PFID(lu_object_fid(&priv->clob->co_lu)));
+	lock_page(vmpage);
+	page = cl_vmpage_page(vmpage, priv->clob);
+	unlock_page(vmpage);
+	put_page(vmpage);
+
+	if (page) {
+		vvp_pgcache_page_show(priv->env, f, page);
+		cl_page_put(priv->env, page);
 	} else {
-		seq_printf(f, "%llx missing\n", pos);
+		seq_puts(f, "missing\n");
 	}
+	lu_object_ref_del(&priv->clob->co_lu, "dump", current);
+	cl_object_put(priv->env, priv->clob);
+
 	return 0;
 }
 
 static void *vvp_pgcache_start(struct seq_file *f, loff_t *pos)
 {
 	struct seq_private	*priv = f->private;
+	struct page *ret;
 
 	if (priv->sbi->ll_site->ls_obj_hash->hs_cur_bits >
-	    64 - PGC_OBJ_SHIFT) {
-		pos = ERR_PTR(-EFBIG);
-	} else {
-		*pos = vvp_pgcache_find(priv->env, &priv->sbi->ll_cl->cd_lu_dev,
-					*pos);
-		if (*pos == ~0ULL)
-			pos = NULL;
-	}
+	    64 - PGC_OBJ_SHIFT)
+		ret = ERR_PTR(-EFBIG);
+	else
+		ret = vvp_pgcache_find(priv->env, &priv->sbi->ll_cl->cd_lu_dev,
+				       &priv->clob, pos);
 
-	return pos;
+	return ret;
 }
 
 static void *vvp_pgcache_next(struct seq_file *f, void *v, loff_t *pos)
 {
 	struct seq_private *priv = f->private;
+	struct page *ret;
 
-	*pos = vvp_pgcache_find(priv->env, &priv->sbi->ll_cl->cd_lu_dev, *pos + 1);
-	if (*pos == ~0ULL)
-		pos = NULL;
-
-	return pos;
+	*pos += 1;
+	ret = vvp_pgcache_find(priv->env, &priv->sbi->ll_cl->cd_lu_dev,
+			       &priv->clob, pos);
+	return ret;
 }
 
 static void vvp_pgcache_stop(struct seq_file *f, void *v)

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 08/10] staging: lustre: move misc-device registration closer to related code.
  2018-05-01  3:52 [PATCH 00/10] staging: lustre: assorted improvements NeilBrown
                   ` (3 preceding siblings ...)
  2018-05-01  3:52 ` [PATCH 07/10] staging: lustre: llite: remove redundant lookup in dump_pgcache NeilBrown
@ 2018-05-01  3:52 ` NeilBrown
  2018-05-02 18:12   ` James Simmons
  2018-05-01  3:52 ` [PATCH 04/10] staging: lustre: lu_object: move retry logic inside htable_lookup NeilBrown
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 28+ messages in thread
From: NeilBrown @ 2018-05-01  3:52 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

The ioctl handler for the misc device is in  lnet/libcfs/module.c
but is it registered in lnet/libcfs/linux/linux-module.c.

Keeping related code together make maintenance easier, so move the
code.

Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
---
 .../staging/lustre/include/linux/libcfs/libcfs.h   |    2 -
 .../lustre/lnet/libcfs/linux/linux-module.c        |   28 ------------------
 drivers/staging/lustre/lnet/libcfs/module.c        |   31 +++++++++++++++++++-
 3 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
index 6e7754b2f296..9263e151451b 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
@@ -141,11 +141,9 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
 int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
 			 const struct libcfs_ioctl_hdr __user *uparam);
 int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data);
-int libcfs_ioctl(unsigned long cmd, void __user *arg);
 
 #define _LIBCFS_H
 
-extern struct miscdevice libcfs_dev;
 /**
  * The path of debug log dump upcall script.
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
index c8908e816c4c..954b681f9db7 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
@@ -166,31 +166,3 @@ int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
 	kvfree(*hdr_pp);
 	return err;
 }
-
-static long
-libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-
-	if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
-	    _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR  ||
-	    _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) {
-		CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
-		       _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
-		return -EINVAL;
-	}
-
-	return libcfs_ioctl(cmd, (void __user *)arg);
-}
-
-static const struct file_operations libcfs_fops = {
-	.owner		= THIS_MODULE,
-	.unlocked_ioctl	= libcfs_psdev_ioctl,
-};
-
-struct miscdevice libcfs_dev = {
-	.minor = MISC_DYNAMIC_MINOR,
-	.name = "lnet",
-	.fops = &libcfs_fops,
-};
diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c
index 4b9acd7bc5cf..3fb150a57f49 100644
--- a/drivers/staging/lustre/lnet/libcfs/module.c
+++ b/drivers/staging/lustre/lnet/libcfs/module.c
@@ -95,7 +95,7 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
 }
 EXPORT_SYMBOL(libcfs_deregister_ioctl);
 
-int libcfs_ioctl(unsigned long cmd, void __user *uparam)
+static int libcfs_ioctl(unsigned long cmd, void __user *uparam)
 {
 	struct libcfs_ioctl_data *data = NULL;
 	struct libcfs_ioctl_hdr *hdr;
@@ -161,6 +161,35 @@ int libcfs_ioctl(unsigned long cmd, void __user *uparam)
 	return err;
 }
 
+
+static long
+libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
+	    _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR  ||
+	    _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) {
+		CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
+		       _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+		return -EINVAL;
+	}
+
+	return libcfs_ioctl(cmd, (void __user *)arg);
+}
+
+static const struct file_operations libcfs_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= libcfs_psdev_ioctl,
+};
+
+struct miscdevice libcfs_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "lnet",
+	.fops = &libcfs_fops,
+};
+
 int lprocfs_call_handler(void *data, int write, loff_t *ppos,
 			 void __user *buffer, size_t *lenp,
 			 int (*handler)(void *data, int write, loff_t pos,

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 09/10] staging: lustre: move remaining code from linux-module.c to module.c
  2018-05-01  3:52 [PATCH 00/10] staging: lustre: assorted improvements NeilBrown
                   ` (8 preceding siblings ...)
  2018-05-01  3:52 ` [PATCH 06/10] staging: lustre: llite: use more private data in dump_pgcache NeilBrown
@ 2018-05-01  3:52 ` NeilBrown
  2018-05-02 18:13   ` James Simmons
  9 siblings, 1 reply; 28+ messages in thread
From: NeilBrown @ 2018-05-01  3:52 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

There is no longer any need to keep this code separate,
and now we can remove linux-module.c

Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
---
 .../staging/lustre/include/linux/libcfs/libcfs.h   |    4 
 drivers/staging/lustre/lnet/libcfs/Makefile        |    1 
 .../lustre/lnet/libcfs/linux/linux-module.c        |  168 --------------------
 drivers/staging/lustre/lnet/libcfs/module.c        |  131 ++++++++++++++++
 4 files changed, 131 insertions(+), 173 deletions(-)
 delete mode 100644 drivers/staging/lustre/lnet/libcfs/linux/linux-module.c

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
index 9263e151451b..d420449b620e 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
@@ -138,10 +138,6 @@ struct libcfs_ioctl_handler {
 int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
 int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
 
-int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
-			 const struct libcfs_ioctl_hdr __user *uparam);
-int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data);
-
 #define _LIBCFS_H
 
 /**
diff --git a/drivers/staging/lustre/lnet/libcfs/Makefile b/drivers/staging/lustre/lnet/libcfs/Makefile
index e6fda27fdabd..e73515789a11 100644
--- a/drivers/staging/lustre/lnet/libcfs/Makefile
+++ b/drivers/staging/lustre/lnet/libcfs/Makefile
@@ -5,7 +5,6 @@ subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
 obj-$(CONFIG_LNET) += libcfs.o
 
 libcfs-linux-objs := linux-tracefile.o linux-debug.o
-libcfs-linux-objs += linux-module.o
 libcfs-linux-objs += linux-crypto.o
 libcfs-linux-objs += linux-crypto-adler.o
 
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
deleted file mode 100644
index 954b681f9db7..000000000000
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
+++ /dev/null
@@ -1,168 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LNET
-
-#include <linux/miscdevice.h>
-#include <linux/libcfs/libcfs.h>
-
-static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
-{
-	size_t len = sizeof(*data);
-
-	len += cfs_size_round(data->ioc_inllen1);
-	len += cfs_size_round(data->ioc_inllen2);
-	return len;
-}
-
-static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
-{
-	if (data->ioc_hdr.ioc_len > BIT(30)) {
-		CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n");
-		return true;
-	}
-	if (data->ioc_inllen1 > BIT(30)) {
-		CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
-		return true;
-	}
-	if (data->ioc_inllen2 > BIT(30)) {
-		CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
-		return true;
-	}
-	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
-		CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
-		return true;
-	}
-	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
-		CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
-		return true;
-	}
-	if (data->ioc_pbuf1 && !data->ioc_plen1) {
-		CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
-		return true;
-	}
-	if (data->ioc_pbuf2 && !data->ioc_plen2) {
-		CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
-		return true;
-	}
-	if (data->ioc_plen1 && !data->ioc_pbuf1) {
-		CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
-		return true;
-	}
-	if (data->ioc_plen2 && !data->ioc_pbuf2) {
-		CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
-		return true;
-	}
-	if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
-		CERROR("LIBCFS ioctl: packlen != ioc_len\n");
-		return true;
-	}
-	if (data->ioc_inllen1 &&
-	    data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
-		CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
-		return true;
-	}
-	if (data->ioc_inllen2 &&
-	    data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
-			   data->ioc_inllen2 - 1] != '\0') {
-		CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
-		return true;
-	}
-	return false;
-}
-
-int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
-{
-	if (libcfs_ioctl_is_invalid(data)) {
-		CERROR("libcfs ioctl: parameter not correctly formatted\n");
-		return -EINVAL;
-	}
-
-	if (data->ioc_inllen1)
-		data->ioc_inlbuf1 = &data->ioc_bulk[0];
-
-	if (data->ioc_inllen2)
-		data->ioc_inlbuf2 = &data->ioc_bulk[0] +
-			cfs_size_round(data->ioc_inllen1);
-
-	return 0;
-}
-
-int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
-			 const struct libcfs_ioctl_hdr __user *uhdr)
-{
-	struct libcfs_ioctl_hdr hdr;
-	int err;
-
-	if (copy_from_user(&hdr, uhdr, sizeof(hdr)))
-		return -EFAULT;
-
-	if (hdr.ioc_version != LIBCFS_IOCTL_VERSION &&
-	    hdr.ioc_version != LIBCFS_IOCTL_VERSION2) {
-		CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n",
-		       LIBCFS_IOCTL_VERSION, hdr.ioc_version);
-		return -EINVAL;
-	}
-
-	if (hdr.ioc_len < sizeof(hdr)) {
-		CERROR("libcfs ioctl: user buffer too small for ioctl\n");
-		return -EINVAL;
-	}
-
-	if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) {
-		CERROR("libcfs ioctl: user buffer is too large %d/%d\n",
-		       hdr.ioc_len, LIBCFS_IOC_DATA_MAX);
-		return -EINVAL;
-	}
-
-	*hdr_pp = kvmalloc(hdr.ioc_len, GFP_KERNEL);
-	if (!*hdr_pp)
-		return -ENOMEM;
-
-	if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) {
-		err = -EFAULT;
-		goto free;
-	}
-
-	if ((*hdr_pp)->ioc_version != hdr.ioc_version ||
-	    (*hdr_pp)->ioc_len != hdr.ioc_len) {
-		err = -EINVAL;
-		goto free;
-	}
-
-	return 0;
-
-free:
-	kvfree(*hdr_pp);
-	return err;
-}
diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c
index 3fb150a57f49..4c4bf09a78dd 100644
--- a/drivers/staging/lustre/lnet/libcfs/module.c
+++ b/drivers/staging/lustre/lnet/libcfs/module.c
@@ -95,6 +95,137 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
 }
 EXPORT_SYMBOL(libcfs_deregister_ioctl);
 
+static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
+{
+	size_t len = sizeof(*data);
+
+	len += cfs_size_round(data->ioc_inllen1);
+	len += cfs_size_round(data->ioc_inllen2);
+	return len;
+}
+
+static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
+{
+	if (data->ioc_hdr.ioc_len > BIT(30)) {
+		CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n");
+		return true;
+	}
+	if (data->ioc_inllen1 > BIT(30)) {
+		CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
+		return true;
+	}
+	if (data->ioc_inllen2 > BIT(30)) {
+		CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
+		return true;
+	}
+	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+		CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
+		return true;
+	}
+	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+		CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
+		return true;
+	}
+	if (data->ioc_pbuf1 && !data->ioc_plen1) {
+		CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
+		return true;
+	}
+	if (data->ioc_pbuf2 && !data->ioc_plen2) {
+		CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
+		return true;
+	}
+	if (data->ioc_plen1 && !data->ioc_pbuf1) {
+		CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
+		return true;
+	}
+	if (data->ioc_plen2 && !data->ioc_pbuf2) {
+		CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
+		return true;
+	}
+	if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
+		CERROR("LIBCFS ioctl: packlen != ioc_len\n");
+		return true;
+	}
+	if (data->ioc_inllen1 &&
+	    data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
+		CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
+		return true;
+	}
+	if (data->ioc_inllen2 &&
+	    data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
+			   data->ioc_inllen2 - 1] != '\0') {
+		CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
+		return true;
+	}
+	return false;
+}
+
+static int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
+{
+	if (libcfs_ioctl_is_invalid(data)) {
+		CERROR("libcfs ioctl: parameter not correctly formatted\n");
+		return -EINVAL;
+	}
+
+	if (data->ioc_inllen1)
+		data->ioc_inlbuf1 = &data->ioc_bulk[0];
+
+	if (data->ioc_inllen2)
+		data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+			cfs_size_round(data->ioc_inllen1);
+
+	return 0;
+}
+
+static int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
+			 const struct libcfs_ioctl_hdr __user *uhdr)
+{
+	struct libcfs_ioctl_hdr hdr;
+	int err;
+
+	if (copy_from_user(&hdr, uhdr, sizeof(hdr)))
+		return -EFAULT;
+
+	if (hdr.ioc_version != LIBCFS_IOCTL_VERSION &&
+	    hdr.ioc_version != LIBCFS_IOCTL_VERSION2) {
+		CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n",
+		       LIBCFS_IOCTL_VERSION, hdr.ioc_version);
+		return -EINVAL;
+	}
+
+	if (hdr.ioc_len < sizeof(hdr)) {
+		CERROR("libcfs ioctl: user buffer too small for ioctl\n");
+		return -EINVAL;
+	}
+
+	if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) {
+		CERROR("libcfs ioctl: user buffer is too large %d/%d\n",
+		       hdr.ioc_len, LIBCFS_IOC_DATA_MAX);
+		return -EINVAL;
+	}
+
+	*hdr_pp = kvmalloc(hdr.ioc_len, GFP_KERNEL);
+	if (!*hdr_pp)
+		return -ENOMEM;
+
+	if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) {
+		err = -EFAULT;
+		goto free;
+	}
+
+	if ((*hdr_pp)->ioc_version != hdr.ioc_version ||
+	    (*hdr_pp)->ioc_len != hdr.ioc_len) {
+		err = -EINVAL;
+		goto free;
+	}
+
+	return 0;
+
+free:
+	kvfree(*hdr_pp);
+	return err;
+}
+
 static int libcfs_ioctl(unsigned long cmd, void __user *uparam)
 {
 	struct libcfs_ioctl_data *data = NULL;

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 10/10] staging: lustre: fix error deref in ll_splice_alias().
  2018-05-01  3:52 [PATCH 00/10] staging: lustre: assorted improvements NeilBrown
                   ` (6 preceding siblings ...)
  2018-05-01  3:52 ` [PATCH 05/10] staging: lustre: fold lu_object_new() into lu_object_find_at() NeilBrown
@ 2018-05-01  3:52 ` NeilBrown
  2018-05-02  3:05   ` James Simmons
  2018-05-01  3:52 ` [PATCH 06/10] staging: lustre: llite: use more private data in dump_pgcache NeilBrown
  2018-05-01  3:52 ` [PATCH 09/10] staging: lustre: move remaining code from linux-module.c to module.c NeilBrown
  9 siblings, 1 reply; 28+ messages in thread
From: NeilBrown @ 2018-05-01  3:52 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

d_splice_alias() can return an ERR_PTR().
If it does while debugging is enabled, the following
CDEBUG() will dereference that error and crash.

So add appropriate checking, and provide a separate
debug message for the error case.

Reported-by: James Simmons <jsimmons@infradead.org>
Fixes: e9d4f0b9f559 ("staging: lustre: llite: use d_splice_alias for directories.")
Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Tested-by: James Simmons <jsimmons@infradead.org>
---
 drivers/staging/lustre/lustre/llite/namei.c |    8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index 6c9ec462eb41..24a6873d86a2 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -442,11 +442,15 @@ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
 	} else {
 		struct dentry *new = d_splice_alias(inode, de);
 
+		if (IS_ERR(new))
+			CDEBUG(D_DENTRY, "splice inode %p as %pd gives error %lu\n",
+			       inode, de, PTR_ERR(new));
 		if (new)
 			de = new;
 	}
-	CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
-	       de, d_inode(de), d_count(de), de->d_flags);
+	if (!IS_ERR(de))
+		CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
+		       de, d_inode(de), d_count(de), de->d_flags);
 	return de;
 }
 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 04/10] staging: lustre: lu_object: move retry logic inside htable_lookup
  2018-05-01  3:52 [PATCH 00/10] staging: lustre: assorted improvements NeilBrown
                   ` (4 preceding siblings ...)
  2018-05-01  3:52 ` [PATCH 08/10] staging: lustre: move misc-device registration closer to related code NeilBrown
@ 2018-05-01  3:52 ` NeilBrown
  2018-05-01  8:22   ` [lustre-devel] " Dilger, Andreas
  2018-05-01  3:52 ` [PATCH 05/10] staging: lustre: fold lu_object_new() into lu_object_find_at() NeilBrown
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 28+ messages in thread
From: NeilBrown @ 2018-05-01  3:52 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

The current retry logic, to wait when a 'dying' object is found,
spans multiple functions.  The process is attached to a waitqueue
and set TASK_UNINTERRUPTIBLE in htable_lookup, and this status
is passed back through lu_object_find_try() to lu_object_find_at()
where schedule() is called and the process is removed from the queue.

This can be simplified by moving all the logic (including
hashtable locking) inside htable_lookup(), which now never returns
EAGAIN.

Note that htable_lookup() is called with the hash bucket lock
held, and will drop and retake it if it needs to schedule.

I made this a 'goto' loop rather than a 'while(1)' loop as the
diff is easier to read.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/staging/lustre/lustre/obdclass/lu_object.c |   73 +++++++-------------
 1 file changed, 27 insertions(+), 46 deletions(-)

diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 2bf089817157..93daa52e2535 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -586,16 +586,21 @@ EXPORT_SYMBOL(lu_object_print);
 static struct lu_object *htable_lookup(struct lu_site *s,
 				       struct cfs_hash_bd *bd,
 				       const struct lu_fid *f,
-				       wait_queue_entry_t *waiter,
 				       __u64 *version)
 {
+	struct cfs_hash		*hs = s->ls_obj_hash;
 	struct lu_site_bkt_data *bkt;
 	struct lu_object_header *h;
 	struct hlist_node	*hnode;
-	__u64  ver = cfs_hash_bd_version_get(bd);
+	__u64 ver;
+	wait_queue_entry_t waiter;
 
-	if (*version == ver)
+retry:
+	ver = cfs_hash_bd_version_get(bd);
+
+	if (*version == ver) {
 		return ERR_PTR(-ENOENT);
+	}
 
 	*version = ver;
 	bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
@@ -625,11 +630,15 @@ static struct lu_object *htable_lookup(struct lu_site *s,
 	 * drained), and moreover, lookup has to wait until object is freed.
 	 */
 
-	init_waitqueue_entry(waiter, current);
-	add_wait_queue(&bkt->lsb_marche_funebre, waiter);
+	init_waitqueue_entry(&waiter, current);
+	add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
-	return ERR_PTR(-EAGAIN);
+	cfs_hash_bd_unlock(hs, bd, 1);
+	schedule();
+	remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+	cfs_hash_bd_lock(hs, bd, 1);
+	goto retry;
 }
 
 /**
@@ -693,13 +702,14 @@ static struct lu_object *lu_object_new(const struct lu_env *env,
 }
 
 /**
- * Core logic of lu_object_find*() functions.
+ * Much like lu_object_find(), but top level device of object is specifically
+ * \a dev rather than top level device of the site. This interface allows
+ * objects of different "stacking" to be created within the same site.
  */
-static struct lu_object *lu_object_find_try(const struct lu_env *env,
-					    struct lu_device *dev,
-					    const struct lu_fid *f,
-					    const struct lu_object_conf *conf,
-					    wait_queue_entry_t *waiter)
+struct lu_object *lu_object_find_at(const struct lu_env *env,
+				    struct lu_device *dev,
+				    const struct lu_fid *f,
+				    const struct lu_object_conf *conf)
 {
 	struct lu_object      *o;
 	struct lu_object      *shadow;
@@ -725,17 +735,16 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 	 * It is unnecessary to perform lookup-alloc-lookup-insert, instead,
 	 * just alloc and insert directly.
 	 *
-	 * If dying object is found during index search, add @waiter to the
-	 * site wait-queue and return ERR_PTR(-EAGAIN).
 	 */
 	if (conf && conf->loc_flags & LOC_F_NEW)
 		return lu_object_new(env, dev, f, conf);
 
 	s  = dev->ld_site;
 	hs = s->ls_obj_hash;
-	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
-	o = htable_lookup(s, &bd, f, waiter, &version);
-	cfs_hash_bd_unlock(hs, &bd, 1);
+	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 0);
+	o = htable_lookup(s, &bd, f, &version);
+	cfs_hash_bd_unlock(hs, &bd, 0);
+
 	if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
 		return o;
 
@@ -751,7 +760,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 
 	cfs_hash_bd_lock(hs, &bd, 1);
 
-	shadow = htable_lookup(s, &bd, f, waiter, &version);
+	shadow = htable_lookup(s, &bd, f, &version);
 	if (likely(PTR_ERR(shadow) == -ENOENT)) {
 		cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
 		cfs_hash_bd_unlock(hs, &bd, 1);
@@ -766,34 +775,6 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 	lu_object_free(env, o);
 	return shadow;
 }
-
-/**
- * Much like lu_object_find(), but top level device of object is specifically
- * \a dev rather than top level device of the site. This interface allows
- * objects of different "stacking" to be created within the same site.
- */
-struct lu_object *lu_object_find_at(const struct lu_env *env,
-				    struct lu_device *dev,
-				    const struct lu_fid *f,
-				    const struct lu_object_conf *conf)
-{
-	wait_queue_head_t	*wq;
-	struct lu_object	*obj;
-	wait_queue_entry_t	   wait;
-
-	while (1) {
-		obj = lu_object_find_try(env, dev, f, conf, &wait);
-		if (obj != ERR_PTR(-EAGAIN))
-			return obj;
-		/*
-		 * lu_object_find_try() already added waiter into the
-		 * wait queue.
-		 */
-		schedule();
-		wq = lu_site_wq_from_fid(dev->ld_site, (void *)f);
-		remove_wait_queue(wq, &wait);
-	}
-}
 EXPORT_SYMBOL(lu_object_find_at);
 
 /**

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 01/10] staging: lustre: ldlm: store name directly in namespace.
  2018-05-01  3:52 ` [PATCH 01/10] staging: lustre: ldlm: store name directly in namespace NeilBrown
@ 2018-05-01  4:04   ` Dilger, Andreas
  2018-05-02 18:11   ` James Simmons
  1 sibling, 0 replies; 28+ messages in thread
From: Dilger, Andreas @ 2018-05-01  4:04 UTC (permalink / raw)
  To: NeilBrown
  Cc: Drokin, Oleg, Greg Kroah-Hartman, James Simmons,
	Linux Kernel Mailing List, Lustre Development List

On Apr 30, 2018, at 21:52, NeilBrown <neilb@suse.com> wrote:
> 
> Rather than storing the name of a namespace in the
> hash table, store it directly in the namespace.
> This will allow the hashtable to be changed to use
> rhashtable.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>

Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>

> ---
> drivers/staging/lustre/lustre/include/lustre_dlm.h |    5 ++++-
> drivers/staging/lustre/lustre/ldlm/ldlm_resource.c |    5 +++++
> 2 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
> index d668d86423a4..b3532adac31c 100644
> --- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
> +++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
> @@ -362,6 +362,9 @@ struct ldlm_namespace {
> 	/** Flag indicating if namespace is on client instead of server */
> 	enum ldlm_side		ns_client;
> 
> +	/** name of this namespace */
> +	char			*ns_name;
> +
> 	/** Resource hash table for namespace. */
> 	struct cfs_hash		*ns_rs_hash;
> 
> @@ -878,7 +881,7 @@ static inline bool ldlm_has_layout(struct ldlm_lock *lock)
> static inline char *
> ldlm_ns_name(struct ldlm_namespace *ns)
> {
> -	return ns->ns_rs_hash->hs_name;
> +	return ns->ns_name;
> }
> 
> static inline struct ldlm_namespace *
> diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
> index 6c615b6e9bdc..43bbc5fd94cc 100644
> --- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
> +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
> @@ -688,6 +688,9 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
> 	ns->ns_obd      = obd;
> 	ns->ns_appetite = apt;
> 	ns->ns_client   = client;
> +	ns->ns_name     = kstrdup(name, GFP_KERNEL);
> +	if (!ns->ns_name)
> +		goto out_hash;
> 
> 	INIT_LIST_HEAD(&ns->ns_list_chain);
> 	INIT_LIST_HEAD(&ns->ns_unused_list);
> @@ -730,6 +733,7 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
> 	ldlm_namespace_sysfs_unregister(ns);
> 	ldlm_namespace_cleanup(ns, 0);
> out_hash:
> +	kfree(ns->ns_name);
> 	cfs_hash_putref(ns->ns_rs_hash);
> out_ns:
> 	kfree(ns);
> @@ -993,6 +997,7 @@ void ldlm_namespace_free_post(struct ldlm_namespace *ns)
> 	ldlm_namespace_debugfs_unregister(ns);
> 	ldlm_namespace_sysfs_unregister(ns);
> 	cfs_hash_putref(ns->ns_rs_hash);
> +	kfree(ns->ns_name);
> 	/* Namespace \a ns should be not on list at this time, otherwise
> 	 * this will cause issues related to using freed \a ns in poold
> 	 * thread.
> 
> 

Cheers, Andreas
--
Andreas Dilger
Lustre Principal Architect
Intel Corporation

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [lustre-devel] [PATCH 02/10] staging: lustre: make struct lu_site_bkt_data private
  2018-05-01  3:52 ` [PATCH 02/10] staging: lustre: make struct lu_site_bkt_data private NeilBrown
@ 2018-05-01  4:10   ` Dilger, Andreas
  2018-05-02  3:02   ` James Simmons
  1 sibling, 0 replies; 28+ messages in thread
From: Dilger, Andreas @ 2018-05-01  4:10 UTC (permalink / raw)
  To: NeilBrown
  Cc: Drokin, Oleg, Greg Kroah-Hartman, James Simmons,
	Linux Kernel Mailing List, Lustre Development List

On Apr 30, 2018, at 21:52, NeilBrown <neilb@suse.com> wrote:
> 
> This data structure only needs to be public so that
> various modules can access a wait queue to wait for object
> destruction.
> If we provide a function to get the wait queue, rather than the
> whole bucket, the structure can be made private.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>

Nice cleanup.

Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>

> ---
> drivers/staging/lustre/lustre/include/lu_object.h  |   36 +-------------
> drivers/staging/lustre/lustre/llite/lcommon_cl.c   |    8 ++-
> drivers/staging/lustre/lustre/lov/lov_object.c     |    8 ++-
> drivers/staging/lustre/lustre/obdclass/lu_object.c |   50 +++++++++++++++++---
> 4 files changed, 54 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
> index c3b0ed518819..f29bbca5af65 100644
> --- a/drivers/staging/lustre/lustre/include/lu_object.h
> +++ b/drivers/staging/lustre/lustre/include/lu_object.h
> @@ -549,31 +549,7 @@ struct lu_object_header {
> };
> 
> struct fld;
> -
> -struct lu_site_bkt_data {
> -	/**
> -	 * number of object in this bucket on the lsb_lru list.
> -	 */
> -	long			lsb_lru_len;
> -	/**
> -	 * LRU list, updated on each access to object. Protected by
> -	 * bucket lock of lu_site::ls_obj_hash.
> -	 *
> -	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
> -	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
> -	 * of list_for_each_entry_safe_reverse()).
> -	 */
> -	struct list_head		lsb_lru;
> -	/**
> -	 * Wait-queue signaled when an object in this site is ultimately
> -	 * destroyed (lu_object_free()). It is used by lu_object_find() to
> -	 * wait before re-trying when object in the process of destruction is
> -	 * found in the hash table.
> -	 *
> -	 * \see htable_lookup().
> -	 */
> -	wait_queue_head_t	       lsb_marche_funebre;
> -};
> +struct lu_site_bkt_data;
> 
> enum {
> 	LU_SS_CREATED	 = 0,
> @@ -642,14 +618,8 @@ struct lu_site {
> 	struct percpu_counter	 ls_lru_len_counter;
> };
> 
> -static inline struct lu_site_bkt_data *
> -lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid)
> -{
> -	struct cfs_hash_bd bd;
> -
> -	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
> -	return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
> -}
> +wait_queue_head_t *
> +lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid);
> 
> static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
> {
> diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
> index df5c0c0ae703..d5b42fb1d601 100644
> --- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
> +++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
> @@ -211,12 +211,12 @@ static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
> 
> 	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
> 		struct lu_site *site = obj->co_lu.lo_dev->ld_site;
> -		struct lu_site_bkt_data *bkt;
> +		wait_queue_head_t *wq;
> 
> -		bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
> +		wq = lu_site_wq_from_fid(site, &header->loh_fid);
> 
> 		init_waitqueue_entry(&waiter, current);
> -		add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
> +		add_wait_queue(wq, &waiter);
> 
> 		while (1) {
> 			set_current_state(TASK_UNINTERRUPTIBLE);
> @@ -226,7 +226,7 @@ static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
> 		}
> 
> 		set_current_state(TASK_RUNNING);
> -		remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
> +		remove_wait_queue(wq, &waiter);
> 	}
> 
> 	cl_object_put(env, obj);
> diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
> index f7c69680cb7d..adc90f310fd7 100644
> --- a/drivers/staging/lustre/lustre/lov/lov_object.c
> +++ b/drivers/staging/lustre/lustre/lov/lov_object.c
> @@ -370,7 +370,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
> 	struct cl_object	*sub;
> 	struct lov_layout_raid0 *r0;
> 	struct lu_site	  *site;
> -	struct lu_site_bkt_data *bkt;
> +	wait_queue_head_t *wq;
> 	wait_queue_entry_t	  *waiter;
> 
> 	r0  = &lov->u.raid0;
> @@ -378,7 +378,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
> 
> 	sub  = lovsub2cl(los);
> 	site = sub->co_lu.lo_dev->ld_site;
> -	bkt  = lu_site_bkt_from_fid(site, &sub->co_lu.lo_header->loh_fid);
> +	wq   = lu_site_wq_from_fid(site, &sub->co_lu.lo_header->loh_fid);
> 
> 	cl_object_kill(env, sub);
> 	/* release a reference to the sub-object and ... */
> @@ -391,7 +391,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
> 	if (r0->lo_sub[idx] == los) {
> 		waiter = &lov_env_info(env)->lti_waiter;
> 		init_waitqueue_entry(waiter, current);
> -		add_wait_queue(&bkt->lsb_marche_funebre, waiter);
> +		add_wait_queue(wq, waiter);
> 		set_current_state(TASK_UNINTERRUPTIBLE);
> 		while (1) {
> 			/* this wait-queue is signaled at the end of
> @@ -408,7 +408,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
> 				break;
> 			}
> 		}
> -		remove_wait_queue(&bkt->lsb_marche_funebre, waiter);
> +		remove_wait_queue(wq, waiter);
> 	}
> 	LASSERT(!r0->lo_sub[idx]);
> }
> diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> index 3de7dc0497c4..2a8a25d6edb5 100644
> --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
> +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> @@ -56,6 +56,31 @@
> #include <lu_ref.h>
> #include <linux/list.h>
> 
> +struct lu_site_bkt_data {
> +	/**
> +	 * number of object in this bucket on the lsb_lru list.
> +	 */
> +	long			lsb_lru_len;
> +	/**
> +	 * LRU list, updated on each access to object. Protected by
> +	 * bucket lock of lu_site::ls_obj_hash.
> +	 *
> +	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
> +	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
> +	 * of list_for_each_entry_safe_reverse()).
> +	 */
> +	struct list_head		lsb_lru;
> +	/**
> +	 * Wait-queue signaled when an object in this site is ultimately
> +	 * destroyed (lu_object_free()). It is used by lu_object_find() to
> +	 * wait before re-trying when object in the process of destruction is
> +	 * found in the hash table.
> +	 *
> +	 * \see htable_lookup().
> +	 */
> +	wait_queue_head_t	       lsb_marche_funebre;
> +};
> +
> enum {
> 	LU_CACHE_PERCENT_MAX	 = 50,
> 	LU_CACHE_PERCENT_DEFAULT = 20
> @@ -88,6 +113,17 @@ MODULE_PARM_DESC(lu_cache_nr, "Maximum number of objects in lu_object cache");
> static void lu_object_free(const struct lu_env *env, struct lu_object *o);
> static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx);
> 
> +wait_queue_head_t *
> +lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid)
> +{
> +	struct cfs_hash_bd bd;
> +	struct lu_site_bkt_data *bkt;
> +
> +	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
> +	bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
> +	return &bkt->lsb_marche_funebre;
> +}
> +
> /**
>  * Decrease reference counter on object. If last reference is freed, return
>  * object to the cache, unless lu_object_is_dying(o) holds. In the latter
> @@ -288,7 +324,7 @@ static struct lu_object *lu_object_alloc(const struct lu_env *env,
>  */
> static void lu_object_free(const struct lu_env *env, struct lu_object *o)
> {
> -	struct lu_site_bkt_data *bkt;
> +	wait_queue_head_t *wq;
> 	struct lu_site	  *site;
> 	struct lu_object	*scan;
> 	struct list_head	      *layers;
> @@ -296,7 +332,7 @@ static void lu_object_free(const struct lu_env *env, struct lu_object *o)
> 
> 	site   = o->lo_dev->ld_site;
> 	layers = &o->lo_header->loh_layers;
> -	bkt    = lu_site_bkt_from_fid(site, &o->lo_header->loh_fid);
> +	wq     = lu_site_wq_from_fid(site, &o->lo_header->loh_fid);
> 	/*
> 	 * First call ->loo_object_delete() method to release all resources.
> 	 */
> @@ -324,8 +360,8 @@ static void lu_object_free(const struct lu_env *env, struct lu_object *o)
> 		o->lo_ops->loo_object_free(env, o);
> 	}
> 
> -	if (waitqueue_active(&bkt->lsb_marche_funebre))
> -		wake_up_all(&bkt->lsb_marche_funebre);
> +	if (waitqueue_active(wq))
> +		wake_up_all(wq);
> }
> 
> /**
> @@ -749,7 +785,7 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
> 				    const struct lu_fid *f,
> 				    const struct lu_object_conf *conf)
> {
> -	struct lu_site_bkt_data *bkt;
> +	wait_queue_head_t	*wq;
> 	struct lu_object	*obj;
> 	wait_queue_entry_t	   wait;
> 
> @@ -762,8 +798,8 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
> 		 * wait queue.
> 		 */
> 		schedule();
> -		bkt = lu_site_bkt_from_fid(dev->ld_site, (void *)f);
> -		remove_wait_queue(&bkt->lsb_marche_funebre, &wait);
> +		wq = lu_site_wq_from_fid(dev->ld_site, (void *)f);
> +		remove_wait_queue(wq, &wait);
> 	}
> }
> EXPORT_SYMBOL(lu_object_find_at);
> 
> 
> _______________________________________________
> lustre-devel mailing list
> lustre-devel@lists.lustre.org
> http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

Cheers, Andreas
--
Andreas Dilger
Lustre Principal Architect
Intel Corporation

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 03/10] staging: lustre: lu_object: discard extra lru count.
  2018-05-01  3:52 ` [PATCH 03/10] staging: lustre: lu_object: discard extra lru count NeilBrown
@ 2018-05-01  4:19   ` Dilger, Andreas
  2018-05-04  0:08     ` NeilBrown
  0 siblings, 1 reply; 28+ messages in thread
From: Dilger, Andreas @ 2018-05-01  4:19 UTC (permalink / raw)
  To: NeilBrown
  Cc: Drokin, Oleg, Greg Kroah-Hartman, James Simmons,
	Linux Kernel Mailing List, Lustre Development List

On Apr 30, 2018, at 21:52, NeilBrown <neilb@suse.com> wrote:
> 
> lu_object maintains 2 lru counts.
> One is a per-bucket lsb_lru_len.
> The other is the per-cpu ls_lru_len_counter.
> 
> The only times the per-bucket counters are use are:
> - a debug message when an object is added
> - in lu_site_stats_get when all the counters are combined.
> 
> The debug message is not essential, and the per-cpu counter
> can be used to get the combined total.
> 
> So discard the per-bucket lsb_lru_len.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>

Looks reasonable, though it would also be possible to fix the percpu
functions rather than adding a workaround in this code.

Reviewed-by: Andreas Dilger <andreas.dilger@dilger.ca>

> ---
> drivers/staging/lustre/lustre/obdclass/lu_object.c |   24 ++++++++------------
> 1 file changed, 9 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> index 2a8a25d6edb5..2bf089817157 100644
> --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
> +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> @@ -57,10 +57,6 @@
> #include <linux/list.h>
> 
> struct lu_site_bkt_data {
> -	/**
> -	 * number of object in this bucket on the lsb_lru list.
> -	 */
> -	long			lsb_lru_len;
> 	/**
> 	 * LRU list, updated on each access to object. Protected by
> 	 * bucket lock of lu_site::ls_obj_hash.
> @@ -187,10 +183,9 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o)
> 	if (!lu_object_is_dying(top)) {
> 		LASSERT(list_empty(&top->loh_lru));
> 		list_add_tail(&top->loh_lru, &bkt->lsb_lru);
> -		bkt->lsb_lru_len++;
> 		percpu_counter_inc(&site->ls_lru_len_counter);
> -		CDEBUG(D_INODE, "Add %p to site lru. hash: %p, bkt: %p, lru_len: %ld\n",
> -		       o, site->ls_obj_hash, bkt, bkt->lsb_lru_len);
> +		CDEBUG(D_INODE, "Add %p to site lru. hash: %p, bkt: %p\n",
> +		       o, site->ls_obj_hash, bkt);
> 		cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1);
> 		return;
> 	}
> @@ -238,7 +233,6 @@ void lu_object_unhash(const struct lu_env *env, struct lu_object *o)
> 
> 			list_del_init(&top->loh_lru);
> 			bkt = cfs_hash_bd_extra_get(obj_hash, &bd);
> -			bkt->lsb_lru_len--;
> 			percpu_counter_dec(&site->ls_lru_len_counter);
> 		}
> 		cfs_hash_bd_del_locked(obj_hash, &bd, &top->loh_hash);
> @@ -422,7 +416,6 @@ int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s,
> 			cfs_hash_bd_del_locked(s->ls_obj_hash,
> 					       &bd2, &h->loh_hash);
> 			list_move(&h->loh_lru, &dispose);
> -			bkt->lsb_lru_len--;
> 			percpu_counter_dec(&s->ls_lru_len_counter);
> 			if (did_sth == 0)
> 				did_sth = 1;
> @@ -621,7 +614,6 @@ static struct lu_object *htable_lookup(struct lu_site *s,
> 		lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
> 		if (!list_empty(&h->loh_lru)) {
> 			list_del_init(&h->loh_lru);
> -			bkt->lsb_lru_len--;
> 			percpu_counter_dec(&s->ls_lru_len_counter);
> 		}
> 		return lu_object_top(h);
> @@ -1834,19 +1826,21 @@ struct lu_site_stats {
> 	unsigned int	lss_busy;
> };
> 
> -static void lu_site_stats_get(struct cfs_hash *hs,
> +static void lu_site_stats_get(const struct lu_site *s,
> 			      struct lu_site_stats *stats, int populated)
> {
> +	struct cfs_hash *hs = s->ls_obj_hash;
> 	struct cfs_hash_bd bd;
> 	unsigned int i;
> +	/* percpu_counter_read_positive() won't accept a const pointer */
> +	struct lu_site *s2 = (struct lu_site *)s;

It would seem worthwhile to change the percpu_counter_read_positive() and
percpu_counter_read() arguments to be "const struct percpu_counter *fbc",
rather than doing this cast here.  I can't see any reason that would be bad,
since both implementations just access fbc->count, and do not modify anything.

> +	stats->lss_busy += cfs_hash_size_get(hs) -
> +		percpu_counter_read_positive(&s2->ls_lru_len_counter);
> 	cfs_hash_for_each_bucket(hs, &bd, i) {
> -		struct lu_site_bkt_data *bkt = cfs_hash_bd_extra_get(hs, &bd);
> 		struct hlist_head	*hhead;
> 
> 		cfs_hash_bd_lock(hs, &bd, 1);
> -		stats->lss_busy  +=
> -			cfs_hash_bd_count_get(&bd) - bkt->lsb_lru_len;
> 		stats->lss_total += cfs_hash_bd_count_get(&bd);
> 		stats->lss_max_search = max((int)stats->lss_max_search,
> 					    cfs_hash_bd_depmax_get(&bd));
> @@ -2039,7 +2033,7 @@ int lu_site_stats_print(const struct lu_site *s, struct seq_file *m)
> 	struct lu_site_stats stats;
> 
> 	memset(&stats, 0, sizeof(stats));
> -	lu_site_stats_get(s->ls_obj_hash, &stats, 1);
> +	lu_site_stats_get(s, &stats, 1);
> 
> 	seq_printf(m, "%d/%d %d/%ld %d %d %d %d %d %d %d\n",
> 		   stats.lss_busy,
> 
> 

Cheers, Andreas
--
Andreas Dilger
Lustre Principal Architect
Intel Corporation

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [lustre-devel] [PATCH 04/10] staging: lustre: lu_object: move retry logic inside htable_lookup
  2018-05-01  3:52 ` [PATCH 04/10] staging: lustre: lu_object: move retry logic inside htable_lookup NeilBrown
@ 2018-05-01  8:22   ` Dilger, Andreas
  2018-05-02 18:21     ` James Simmons
  2018-05-04  1:30     ` NeilBrown
  0 siblings, 2 replies; 28+ messages in thread
From: Dilger, Andreas @ 2018-05-01  8:22 UTC (permalink / raw)
  To: NeilBrown
  Cc: Drokin, Oleg, Greg Kroah-Hartman, James Simmons,
	Linux Kernel Mailing List, Lustre Development List

On Apr 30, 2018, at 21:52, NeilBrown <neilb@suse.com> wrote:
> 
> The current retry logic, to wait when a 'dying' object is found,
> spans multiple functions.  The process is attached to a waitqueue
> and set TASK_UNINTERRUPTIBLE in htable_lookup, and this status
> is passed back through lu_object_find_try() to lu_object_find_at()
> where schedule() is called and the process is removed from the queue.
> 
> This can be simplified by moving all the logic (including
> hashtable locking) inside htable_lookup(), which now never returns
> EAGAIN.
> 
> Note that htable_lookup() is called with the hash bucket lock
> held, and will drop and retake it if it needs to schedule.
> 
> I made this a 'goto' loop rather than a 'while(1)' loop as the
> diff is easier to read.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
> drivers/staging/lustre/lustre/obdclass/lu_object.c |   73 +++++++-------------
> 1 file changed, 27 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> index 2bf089817157..93daa52e2535 100644
> --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
> +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> @@ -586,16 +586,21 @@ EXPORT_SYMBOL(lu_object_print);
> static struct lu_object *htable_lookup(struct lu_site *s,

It's probably a good idea to add a comment for this function that it may
drop and re-acquire the hash bucket lock internally.

> 				       struct cfs_hash_bd *bd,
> 				       const struct lu_fid *f,
> -				       wait_queue_entry_t *waiter,
> 				       __u64 *version)
> {
> +	struct cfs_hash		*hs = s->ls_obj_hash;
> 	struct lu_site_bkt_data *bkt;
> 	struct lu_object_header *h;
> 	struct hlist_node	*hnode;
> -	__u64  ver = cfs_hash_bd_version_get(bd);
> +	__u64 ver;
> +	wait_queue_entry_t waiter;
> 
> -	if (*version == ver)
> +retry:
> +	ver = cfs_hash_bd_version_get(bd);
> +
> +	if (*version == ver) {
> 		return ERR_PTR(-ENOENT);
> +	}

(style) we don't need the {} around a single-line if statement

> 	*version = ver;
> 	bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
> @@ -625,11 +630,15 @@ static struct lu_object *htable_lookup(struct lu_site *s,
> 	 * drained), and moreover, lookup has to wait until object is freed.
> 	 */
> 
> -	init_waitqueue_entry(waiter, current);
> -	add_wait_queue(&bkt->lsb_marche_funebre, waiter);
> +	init_waitqueue_entry(&waiter, current);
> +	add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
> 	set_current_state(TASK_UNINTERRUPTIBLE);
> 	lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
> -	return ERR_PTR(-EAGAIN);
> +	cfs_hash_bd_unlock(hs, bd, 1);

This looks like it isn't unlocking and locking the hash bucket in the same
manner that it was done in the caller.  Here excl = 1, but in the caller
you changed it to excl = 0?

> +	schedule();
> +	remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);

Is it worthwhile to use your new helper function here to get the wq from "s"?

> +	cfs_hash_bd_lock(hs, bd, 1);
> +	goto retry;
> }
> 
> /**
> @@ -693,13 +702,14 @@ static struct lu_object *lu_object_new(const struct lu_env *env,
> }
> 
> /**
> - * Core logic of lu_object_find*() functions.
> + * Much like lu_object_find(), but top level device of object is specifically
> + * \a dev rather than top level device of the site. This interface allows
> + * objects of different "stacking" to be created within the same site.
>  */
> -static struct lu_object *lu_object_find_try(const struct lu_env *env,
> -					    struct lu_device *dev,
> -					    const struct lu_fid *f,
> -					    const struct lu_object_conf *conf,
> -					    wait_queue_entry_t *waiter)
> +struct lu_object *lu_object_find_at(const struct lu_env *env,
> +				    struct lu_device *dev,
> +				    const struct lu_fid *f,
> +				    const struct lu_object_conf *conf)
> {
> 	struct lu_object      *o;
> 	struct lu_object      *shadow;
> @@ -725,17 +735,16 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
> 	 * It is unnecessary to perform lookup-alloc-lookup-insert, instead,
> 	 * just alloc and insert directly.
> 	 *
> -	 * If dying object is found during index search, add @waiter to the
> -	 * site wait-queue and return ERR_PTR(-EAGAIN).
> 	 */
> 	if (conf && conf->loc_flags & LOC_F_NEW)
> 		return lu_object_new(env, dev, f, conf);
> 
> 	s  = dev->ld_site;
> 	hs = s->ls_obj_hash;
> -	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
> -	o = htable_lookup(s, &bd, f, waiter, &version);
> -	cfs_hash_bd_unlock(hs, &bd, 1);
> +	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 0);
> +	o = htable_lookup(s, &bd, f, &version);
> +	cfs_hash_bd_unlock(hs, &bd, 0);

Here you changed the locking to a non-exclusive (read) lock instead of an
exclusive (write) lock?  Why.

> +
> 	if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
> 		return o;
> 
> @@ -751,7 +760,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
> 
> 	cfs_hash_bd_lock(hs, &bd, 1);
> 
> -	shadow = htable_lookup(s, &bd, f, waiter, &version);
> +	shadow = htable_lookup(s, &bd, f, &version);
> 	if (likely(PTR_ERR(shadow) == -ENOENT)) {
> 		cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
> 		cfs_hash_bd_unlock(hs, &bd, 1);
> @@ -766,34 +775,6 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
> 	lu_object_free(env, o);
> 	return shadow;
> }
> -
> -/**
> - * Much like lu_object_find(), but top level device of object is specifically
> - * \a dev rather than top level device of the site. This interface allows
> - * objects of different "stacking" to be created within the same site.
> - */
> -struct lu_object *lu_object_find_at(const struct lu_env *env,
> -				    struct lu_device *dev,
> -				    const struct lu_fid *f,
> -				    const struct lu_object_conf *conf)
> -{
> -	wait_queue_head_t	*wq;
> -	struct lu_object	*obj;
> -	wait_queue_entry_t	   wait;
> -
> -	while (1) {
> -		obj = lu_object_find_try(env, dev, f, conf, &wait);
> -		if (obj != ERR_PTR(-EAGAIN))
> -			return obj;
> -		/*
> -		 * lu_object_find_try() already added waiter into the
> -		 * wait queue.
> -		 */
> -		schedule();
> -		wq = lu_site_wq_from_fid(dev->ld_site, (void *)f);
> -		remove_wait_queue(wq, &wait);
> -	}
> -}
> EXPORT_SYMBOL(lu_object_find_at);
> 
> /**
> 
> 
> _______________________________________________
> lustre-devel mailing list
> lustre-devel@lists.lustre.org
> http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

Cheers, Andreas
--
Andreas Dilger
Lustre Principal Architect
Intel Corporation

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 02/10] staging: lustre: make struct lu_site_bkt_data private
  2018-05-01  3:52 ` [PATCH 02/10] staging: lustre: make struct lu_site_bkt_data private NeilBrown
  2018-05-01  4:10   ` [lustre-devel] " Dilger, Andreas
@ 2018-05-02  3:02   ` James Simmons
  2018-05-03 23:39     ` NeilBrown
  1 sibling, 1 reply; 28+ messages in thread
From: James Simmons @ 2018-05-02  3:02 UTC (permalink / raw)
  To: NeilBrown
  Cc: Oleg Drokin, Greg Kroah-Hartman, Andreas Dilger,
	Linux Kernel Mailing List, Lustre Development List


> This data structure only needs to be public so that
> various modules can access a wait queue to wait for object
> destruction.
> If we provide a function to get the wait queue, rather than the
> whole bucket, the structure can be made private.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
>  drivers/staging/lustre/lustre/include/lu_object.h  |   36 +-------------
>  drivers/staging/lustre/lustre/llite/lcommon_cl.c   |    8 ++-
>  drivers/staging/lustre/lustre/lov/lov_object.c     |    8 ++-
>  drivers/staging/lustre/lustre/obdclass/lu_object.c |   50 +++++++++++++++++---
>  4 files changed, 54 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
> index c3b0ed518819..f29bbca5af65 100644
> --- a/drivers/staging/lustre/lustre/include/lu_object.h
> +++ b/drivers/staging/lustre/lustre/include/lu_object.h
> @@ -549,31 +549,7 @@ struct lu_object_header {
>  };
>  
>  struct fld;
> -
> -struct lu_site_bkt_data {
> -	/**
> -	 * number of object in this bucket on the lsb_lru list.
> -	 */
> -	long			lsb_lru_len;
> -	/**
> -	 * LRU list, updated on each access to object. Protected by
> -	 * bucket lock of lu_site::ls_obj_hash.
> -	 *
> -	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
> -	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
> -	 * of list_for_each_entry_safe_reverse()).
> -	 */
> -	struct list_head		lsb_lru;
> -	/**
> -	 * Wait-queue signaled when an object in this site is ultimately
> -	 * destroyed (lu_object_free()). It is used by lu_object_find() to
> -	 * wait before re-trying when object in the process of destruction is
> -	 * found in the hash table.
> -	 *
> -	 * \see htable_lookup().
> -	 */
> -	wait_queue_head_t	       lsb_marche_funebre;
> -};
> +struct lu_site_bkt_data;
>  
>  enum {
>  	LU_SS_CREATED	 = 0,
> @@ -642,14 +618,8 @@ struct lu_site {
>  	struct percpu_counter	 ls_lru_len_counter;
>  };
>  
> -static inline struct lu_site_bkt_data *
> -lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid)
> -{
> -	struct cfs_hash_bd bd;
> -
> -	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
> -	return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
> -}
> +wait_queue_head_t *
> +lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid);
>  
>  static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
>  {
> diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
> index df5c0c0ae703..d5b42fb1d601 100644
> --- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
> +++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
> @@ -211,12 +211,12 @@ static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
>  
>  	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
>  		struct lu_site *site = obj->co_lu.lo_dev->ld_site;
> -		struct lu_site_bkt_data *bkt;
> +		wait_queue_head_t *wq;
>  
> -		bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
> +		wq = lu_site_wq_from_fid(site, &header->loh_fid);
>  
>  		init_waitqueue_entry(&waiter, current);
> -		add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
> +		add_wait_queue(wq, &waiter);
>  
>  		while (1) {
>  			set_current_state(TASK_UNINTERRUPTIBLE);
> @@ -226,7 +226,7 @@ static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
>  		}
>  
>  		set_current_state(TASK_RUNNING);
> -		remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
> +		remove_wait_queue(wq, &waiter);
>  	}
>  
>  	cl_object_put(env, obj);
> diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
> index f7c69680cb7d..adc90f310fd7 100644
> --- a/drivers/staging/lustre/lustre/lov/lov_object.c
> +++ b/drivers/staging/lustre/lustre/lov/lov_object.c
> @@ -370,7 +370,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
>  	struct cl_object	*sub;
>  	struct lov_layout_raid0 *r0;
>  	struct lu_site	  *site;
> -	struct lu_site_bkt_data *bkt;
> +	wait_queue_head_t *wq;
>  	wait_queue_entry_t	  *waiter;
>  
>  	r0  = &lov->u.raid0;
> @@ -378,7 +378,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
>  
>  	sub  = lovsub2cl(los);
>  	site = sub->co_lu.lo_dev->ld_site;
> -	bkt  = lu_site_bkt_from_fid(site, &sub->co_lu.lo_header->loh_fid);
> +	wq   = lu_site_wq_from_fid(site, &sub->co_lu.lo_header->loh_fid);
>  
>  	cl_object_kill(env, sub);
>  	/* release a reference to the sub-object and ... */
> @@ -391,7 +391,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
>  	if (r0->lo_sub[idx] == los) {
>  		waiter = &lov_env_info(env)->lti_waiter;
>  		init_waitqueue_entry(waiter, current);
> -		add_wait_queue(&bkt->lsb_marche_funebre, waiter);
> +		add_wait_queue(wq, waiter);
>  		set_current_state(TASK_UNINTERRUPTIBLE);
>  		while (1) {
>  			/* this wait-queue is signaled at the end of
> @@ -408,7 +408,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
>  				break;
>  			}
>  		}
> -		remove_wait_queue(&bkt->lsb_marche_funebre, waiter);
> +		remove_wait_queue(wq, waiter);
>  	}
>  	LASSERT(!r0->lo_sub[idx]);
>  }
> diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> index 3de7dc0497c4..2a8a25d6edb5 100644
> --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
> +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> @@ -56,6 +56,31 @@
>  #include <lu_ref.h>
>  #include <linux/list.h>
>  
> +struct lu_site_bkt_data {
> +	/**
> +	 * number of object in this bucket on the lsb_lru list.
> +	 */
> +	long			lsb_lru_len;
> +	/**
> +	 * LRU list, updated on each access to object. Protected by
> +	 * bucket lock of lu_site::ls_obj_hash.
> +	 *
> +	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
> +	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
> +	 * of list_for_each_entry_safe_reverse()).
> +	 */
> +	struct list_head		lsb_lru;
> +	/**
> +	 * Wait-queue signaled when an object in this site is ultimately
> +	 * destroyed (lu_object_free()). It is used by lu_object_find() to
> +	 * wait before re-trying when object in the process of destruction is
> +	 * found in the hash table.
> +	 *
> +	 * \see htable_lookup().
> +	 */
> +	wait_queue_head_t	       lsb_marche_funebre;
> +};
> +
>  enum {
>  	LU_CACHE_PERCENT_MAX	 = 50,
>  	LU_CACHE_PERCENT_DEFAULT = 20
> @@ -88,6 +113,17 @@ MODULE_PARM_DESC(lu_cache_nr, "Maximum number of objects in lu_object cache");
>  static void lu_object_free(const struct lu_env *env, struct lu_object *o);
>  static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx);
>  
> +wait_queue_head_t *
> +lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid)
> +{
> +	struct cfs_hash_bd bd;
> +	struct lu_site_bkt_data *bkt;
> +
> +	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
> +	bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
> +	return &bkt->lsb_marche_funebre;
> +}
> +

Nak. You forgot the EXPORT_SYMBOL for this. Once I added the export symbol
it seems to work so far in my testing.

>  /**
>   * Decrease reference counter on object. If last reference is freed, return
>   * object to the cache, unless lu_object_is_dying(o) holds. In the latter
> @@ -288,7 +324,7 @@ static struct lu_object *lu_object_alloc(const struct lu_env *env,
>   */
>  static void lu_object_free(const struct lu_env *env, struct lu_object *o)
>  {
> -	struct lu_site_bkt_data *bkt;
> +	wait_queue_head_t *wq;
>  	struct lu_site	  *site;
>  	struct lu_object	*scan;
>  	struct list_head	      *layers;
> @@ -296,7 +332,7 @@ static void lu_object_free(const struct lu_env *env, struct lu_object *o)
>  
>  	site   = o->lo_dev->ld_site;
>  	layers = &o->lo_header->loh_layers;
> -	bkt    = lu_site_bkt_from_fid(site, &o->lo_header->loh_fid);
> +	wq     = lu_site_wq_from_fid(site, &o->lo_header->loh_fid);
>  	/*
>  	 * First call ->loo_object_delete() method to release all resources.
>  	 */
> @@ -324,8 +360,8 @@ static void lu_object_free(const struct lu_env *env, struct lu_object *o)
>  		o->lo_ops->loo_object_free(env, o);
>  	}
>  
> -	if (waitqueue_active(&bkt->lsb_marche_funebre))
> -		wake_up_all(&bkt->lsb_marche_funebre);
> +	if (waitqueue_active(wq))
> +		wake_up_all(wq);
>  }
>  
>  /**
> @@ -749,7 +785,7 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
>  				    const struct lu_fid *f,
>  				    const struct lu_object_conf *conf)
>  {
> -	struct lu_site_bkt_data *bkt;
> +	wait_queue_head_t	*wq;
>  	struct lu_object	*obj;
>  	wait_queue_entry_t	   wait;
>  
> @@ -762,8 +798,8 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
>  		 * wait queue.
>  		 */
>  		schedule();
> -		bkt = lu_site_bkt_from_fid(dev->ld_site, (void *)f);
> -		remove_wait_queue(&bkt->lsb_marche_funebre, &wait);
> +		wq = lu_site_wq_from_fid(dev->ld_site, (void *)f);
> +		remove_wait_queue(wq, &wait);
>  	}
>  }
>  EXPORT_SYMBOL(lu_object_find_at);
> 
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 10/10] staging: lustre: fix error deref in ll_splice_alias().
  2018-05-01  3:52 ` [PATCH 10/10] staging: lustre: fix error deref in ll_splice_alias() NeilBrown
@ 2018-05-02  3:05   ` James Simmons
  2018-05-04  0:34     ` NeilBrown
  0 siblings, 1 reply; 28+ messages in thread
From: James Simmons @ 2018-05-02  3:05 UTC (permalink / raw)
  To: NeilBrown
  Cc: Oleg Drokin, Greg Kroah-Hartman, Andreas Dilger,
	Linux Kernel Mailing List, Lustre Development List


> d_splice_alias() can return an ERR_PTR().
> If it does while debugging is enabled, the following
> CDEBUG() will dereference that error and crash.
> 
> So add appropriate checking, and provide a separate
> debug message for the error case.

Yeah!!! It fixed the issues. Thank you.

Reviewed-by: James Simmons <jsimmons@infradead.org>
 
> Reported-by: James Simmons <jsimmons@infradead.org>
> Fixes: e9d4f0b9f559 ("staging: lustre: llite: use d_splice_alias for directories.")
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
>  drivers/staging/lustre/lustre/llite/namei.c |    8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
> index 6c9ec462eb41..24a6873d86a2 100644
> --- a/drivers/staging/lustre/lustre/llite/namei.c
> +++ b/drivers/staging/lustre/lustre/llite/namei.c
> @@ -442,11 +442,15 @@ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
>  	} else {
>  		struct dentry *new = d_splice_alias(inode, de);
>  
> +		if (IS_ERR(new))
> +			CDEBUG(D_DENTRY, "splice inode %p as %pd gives error %lu\n",
> +			       inode, de, PTR_ERR(new));
>  		if (new)
>  			de = new;
>  	}
> -	CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
> -	       de, d_inode(de), d_count(de), de->d_flags);
> +	if (!IS_ERR(de))
> +		CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
> +		       de, d_inode(de), d_count(de), de->d_flags);
>  	return de;
>  }
>  
> 
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 01/10] staging: lustre: ldlm: store name directly in namespace.
  2018-05-01  3:52 ` [PATCH 01/10] staging: lustre: ldlm: store name directly in namespace NeilBrown
  2018-05-01  4:04   ` Dilger, Andreas
@ 2018-05-02 18:11   ` James Simmons
  1 sibling, 0 replies; 28+ messages in thread
From: James Simmons @ 2018-05-02 18:11 UTC (permalink / raw)
  To: NeilBrown
  Cc: Oleg Drokin, Greg Kroah-Hartman, Andreas Dilger,
	Linux Kernel Mailing List, Lustre Development List


> Rather than storing the name of a namespace in the
> hash table, store it directly in the namespace.
> This will allow the hashtable to be changed to use
> rhashtable.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>

Reviewed-by: James Simmons <jsimmons@infradead.org>

> ---
>  drivers/staging/lustre/lustre/include/lustre_dlm.h |    5 ++++-
>  drivers/staging/lustre/lustre/ldlm/ldlm_resource.c |    5 +++++
>  2 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
> index d668d86423a4..b3532adac31c 100644
> --- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
> +++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
> @@ -362,6 +362,9 @@ struct ldlm_namespace {
>  	/** Flag indicating if namespace is on client instead of server */
>  	enum ldlm_side		ns_client;
>  
> +	/** name of this namespace */
> +	char			*ns_name;
> +
>  	/** Resource hash table for namespace. */
>  	struct cfs_hash		*ns_rs_hash;
>  
> @@ -878,7 +881,7 @@ static inline bool ldlm_has_layout(struct ldlm_lock *lock)
>  static inline char *
>  ldlm_ns_name(struct ldlm_namespace *ns)
>  {
> -	return ns->ns_rs_hash->hs_name;
> +	return ns->ns_name;
>  }
>  
>  static inline struct ldlm_namespace *
> diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
> index 6c615b6e9bdc..43bbc5fd94cc 100644
> --- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
> +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
> @@ -688,6 +688,9 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
>  	ns->ns_obd      = obd;
>  	ns->ns_appetite = apt;
>  	ns->ns_client   = client;
> +	ns->ns_name     = kstrdup(name, GFP_KERNEL);
> +	if (!ns->ns_name)
> +		goto out_hash;
>  
>  	INIT_LIST_HEAD(&ns->ns_list_chain);
>  	INIT_LIST_HEAD(&ns->ns_unused_list);
> @@ -730,6 +733,7 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
>  	ldlm_namespace_sysfs_unregister(ns);
>  	ldlm_namespace_cleanup(ns, 0);
>  out_hash:
> +	kfree(ns->ns_name);
>  	cfs_hash_putref(ns->ns_rs_hash);
>  out_ns:
>  	kfree(ns);
> @@ -993,6 +997,7 @@ void ldlm_namespace_free_post(struct ldlm_namespace *ns)
>  	ldlm_namespace_debugfs_unregister(ns);
>  	ldlm_namespace_sysfs_unregister(ns);
>  	cfs_hash_putref(ns->ns_rs_hash);
> +	kfree(ns->ns_name);
>  	/* Namespace \a ns should be not on list at this time, otherwise
>  	 * this will cause issues related to using freed \a ns in poold
>  	 * thread.
> 
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 08/10] staging: lustre: move misc-device registration closer to related code.
  2018-05-01  3:52 ` [PATCH 08/10] staging: lustre: move misc-device registration closer to related code NeilBrown
@ 2018-05-02 18:12   ` James Simmons
  0 siblings, 0 replies; 28+ messages in thread
From: James Simmons @ 2018-05-02 18:12 UTC (permalink / raw)
  To: NeilBrown
  Cc: Oleg Drokin, Greg Kroah-Hartman, Andreas Dilger,
	Linux Kernel Mailing List, Lustre Development List


> The ioctl handler for the misc device is in  lnet/libcfs/module.c
> but is it registered in lnet/libcfs/linux/linux-module.c.
> 
> Keeping related code together make maintenance easier, so move the
> code.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>

Reviewed-by: James Simmons <jsimmons@infradead.org>

> ---
>  .../staging/lustre/include/linux/libcfs/libcfs.h   |    2 -
>  .../lustre/lnet/libcfs/linux/linux-module.c        |   28 ------------------
>  drivers/staging/lustre/lnet/libcfs/module.c        |   31 +++++++++++++++++++-
>  3 files changed, 30 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
> index 6e7754b2f296..9263e151451b 100644
> --- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
> +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
> @@ -141,11 +141,9 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
>  int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
>  			 const struct libcfs_ioctl_hdr __user *uparam);
>  int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data);
> -int libcfs_ioctl(unsigned long cmd, void __user *arg);
>  
>  #define _LIBCFS_H
>  
> -extern struct miscdevice libcfs_dev;
>  /**
>   * The path of debug log dump upcall script.
>   */
> diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
> index c8908e816c4c..954b681f9db7 100644
> --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
> +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
> @@ -166,31 +166,3 @@ int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
>  	kvfree(*hdr_pp);
>  	return err;
>  }
> -
> -static long
> -libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> -{
> -	if (!capable(CAP_SYS_ADMIN))
> -		return -EACCES;
> -
> -	if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
> -	    _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR  ||
> -	    _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) {
> -		CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
> -		       _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
> -		return -EINVAL;
> -	}
> -
> -	return libcfs_ioctl(cmd, (void __user *)arg);
> -}
> -
> -static const struct file_operations libcfs_fops = {
> -	.owner		= THIS_MODULE,
> -	.unlocked_ioctl	= libcfs_psdev_ioctl,
> -};
> -
> -struct miscdevice libcfs_dev = {
> -	.minor = MISC_DYNAMIC_MINOR,
> -	.name = "lnet",
> -	.fops = &libcfs_fops,
> -};
> diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c
> index 4b9acd7bc5cf..3fb150a57f49 100644
> --- a/drivers/staging/lustre/lnet/libcfs/module.c
> +++ b/drivers/staging/lustre/lnet/libcfs/module.c
> @@ -95,7 +95,7 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
>  }
>  EXPORT_SYMBOL(libcfs_deregister_ioctl);
>  
> -int libcfs_ioctl(unsigned long cmd, void __user *uparam)
> +static int libcfs_ioctl(unsigned long cmd, void __user *uparam)
>  {
>  	struct libcfs_ioctl_data *data = NULL;
>  	struct libcfs_ioctl_hdr *hdr;
> @@ -161,6 +161,35 @@ int libcfs_ioctl(unsigned long cmd, void __user *uparam)
>  	return err;
>  }
>  
> +
> +static long
> +libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> +{
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EACCES;
> +
> +	if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
> +	    _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR  ||
> +	    _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) {
> +		CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
> +		       _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
> +		return -EINVAL;
> +	}
> +
> +	return libcfs_ioctl(cmd, (void __user *)arg);
> +}
> +
> +static const struct file_operations libcfs_fops = {
> +	.owner		= THIS_MODULE,
> +	.unlocked_ioctl	= libcfs_psdev_ioctl,
> +};
> +
> +struct miscdevice libcfs_dev = {
> +	.minor = MISC_DYNAMIC_MINOR,
> +	.name = "lnet",
> +	.fops = &libcfs_fops,
> +};
> +
>  int lprocfs_call_handler(void *data, int write, loff_t *ppos,
>  			 void __user *buffer, size_t *lenp,
>  			 int (*handler)(void *data, int write, loff_t pos,
> 
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 09/10] staging: lustre: move remaining code from linux-module.c to module.c
  2018-05-01  3:52 ` [PATCH 09/10] staging: lustre: move remaining code from linux-module.c to module.c NeilBrown
@ 2018-05-02 18:13   ` James Simmons
  0 siblings, 0 replies; 28+ messages in thread
From: James Simmons @ 2018-05-02 18:13 UTC (permalink / raw)
  To: NeilBrown
  Cc: Oleg Drokin, Greg Kroah-Hartman, Andreas Dilger,
	Linux Kernel Mailing List, Lustre Development List


> There is no longer any need to keep this code separate,
> and now we can remove linux-module.c
> 
> Signed-off-by: NeilBrown <neilb@suse.com>

Reviewed-by: James Simmons <jsimmons@infradead.org>

> ---
>  .../staging/lustre/include/linux/libcfs/libcfs.h   |    4 
>  drivers/staging/lustre/lnet/libcfs/Makefile        |    1 
>  .../lustre/lnet/libcfs/linux/linux-module.c        |  168 --------------------
>  drivers/staging/lustre/lnet/libcfs/module.c        |  131 ++++++++++++++++
>  4 files changed, 131 insertions(+), 173 deletions(-)
>  delete mode 100644 drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
> 
> diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
> index 9263e151451b..d420449b620e 100644
> --- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
> +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
> @@ -138,10 +138,6 @@ struct libcfs_ioctl_handler {
>  int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
>  int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
>  
> -int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
> -			 const struct libcfs_ioctl_hdr __user *uparam);
> -int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data);
> -
>  #define _LIBCFS_H
>  
>  /**
> diff --git a/drivers/staging/lustre/lnet/libcfs/Makefile b/drivers/staging/lustre/lnet/libcfs/Makefile
> index e6fda27fdabd..e73515789a11 100644
> --- a/drivers/staging/lustre/lnet/libcfs/Makefile
> +++ b/drivers/staging/lustre/lnet/libcfs/Makefile
> @@ -5,7 +5,6 @@ subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
>  obj-$(CONFIG_LNET) += libcfs.o
>  
>  libcfs-linux-objs := linux-tracefile.o linux-debug.o
> -libcfs-linux-objs += linux-module.o
>  libcfs-linux-objs += linux-crypto.o
>  libcfs-linux-objs += linux-crypto-adler.o
>  
> diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
> deleted file mode 100644
> index 954b681f9db7..000000000000
> --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
> +++ /dev/null
> @@ -1,168 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0
> -/*
> - * GPL HEADER START
> - *
> - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 only,
> - * as published by the Free Software Foundation.
> - *
> - * This program is distributed in the hope that it will be useful, but
> - * WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> - * General Public License version 2 for more details (a copy is included
> - * in the LICENSE file that accompanied this code).
> - *
> - * You should have received a copy of the GNU General Public License
> - * version 2 along with this program; If not, see
> - * http://www.gnu.org/licenses/gpl-2.0.html
> - *
> - * GPL HEADER END
> - */
> -/*
> - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
> - * Use is subject to license terms.
> - *
> - * Copyright (c) 2012, Intel Corporation.
> - */
> -/*
> - * This file is part of Lustre, http://www.lustre.org/
> - * Lustre is a trademark of Sun Microsystems, Inc.
> - */
> -
> -#define DEBUG_SUBSYSTEM S_LNET
> -
> -#include <linux/miscdevice.h>
> -#include <linux/libcfs/libcfs.h>
> -
> -static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
> -{
> -	size_t len = sizeof(*data);
> -
> -	len += cfs_size_round(data->ioc_inllen1);
> -	len += cfs_size_round(data->ioc_inllen2);
> -	return len;
> -}
> -
> -static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
> -{
> -	if (data->ioc_hdr.ioc_len > BIT(30)) {
> -		CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n");
> -		return true;
> -	}
> -	if (data->ioc_inllen1 > BIT(30)) {
> -		CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
> -		return true;
> -	}
> -	if (data->ioc_inllen2 > BIT(30)) {
> -		CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
> -		return true;
> -	}
> -	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
> -		CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
> -		return true;
> -	}
> -	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
> -		CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
> -		return true;
> -	}
> -	if (data->ioc_pbuf1 && !data->ioc_plen1) {
> -		CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
> -		return true;
> -	}
> -	if (data->ioc_pbuf2 && !data->ioc_plen2) {
> -		CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
> -		return true;
> -	}
> -	if (data->ioc_plen1 && !data->ioc_pbuf1) {
> -		CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
> -		return true;
> -	}
> -	if (data->ioc_plen2 && !data->ioc_pbuf2) {
> -		CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
> -		return true;
> -	}
> -	if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
> -		CERROR("LIBCFS ioctl: packlen != ioc_len\n");
> -		return true;
> -	}
> -	if (data->ioc_inllen1 &&
> -	    data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
> -		CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
> -		return true;
> -	}
> -	if (data->ioc_inllen2 &&
> -	    data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
> -			   data->ioc_inllen2 - 1] != '\0') {
> -		CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
> -		return true;
> -	}
> -	return false;
> -}
> -
> -int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
> -{
> -	if (libcfs_ioctl_is_invalid(data)) {
> -		CERROR("libcfs ioctl: parameter not correctly formatted\n");
> -		return -EINVAL;
> -	}
> -
> -	if (data->ioc_inllen1)
> -		data->ioc_inlbuf1 = &data->ioc_bulk[0];
> -
> -	if (data->ioc_inllen2)
> -		data->ioc_inlbuf2 = &data->ioc_bulk[0] +
> -			cfs_size_round(data->ioc_inllen1);
> -
> -	return 0;
> -}
> -
> -int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
> -			 const struct libcfs_ioctl_hdr __user *uhdr)
> -{
> -	struct libcfs_ioctl_hdr hdr;
> -	int err;
> -
> -	if (copy_from_user(&hdr, uhdr, sizeof(hdr)))
> -		return -EFAULT;
> -
> -	if (hdr.ioc_version != LIBCFS_IOCTL_VERSION &&
> -	    hdr.ioc_version != LIBCFS_IOCTL_VERSION2) {
> -		CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n",
> -		       LIBCFS_IOCTL_VERSION, hdr.ioc_version);
> -		return -EINVAL;
> -	}
> -
> -	if (hdr.ioc_len < sizeof(hdr)) {
> -		CERROR("libcfs ioctl: user buffer too small for ioctl\n");
> -		return -EINVAL;
> -	}
> -
> -	if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) {
> -		CERROR("libcfs ioctl: user buffer is too large %d/%d\n",
> -		       hdr.ioc_len, LIBCFS_IOC_DATA_MAX);
> -		return -EINVAL;
> -	}
> -
> -	*hdr_pp = kvmalloc(hdr.ioc_len, GFP_KERNEL);
> -	if (!*hdr_pp)
> -		return -ENOMEM;
> -
> -	if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) {
> -		err = -EFAULT;
> -		goto free;
> -	}
> -
> -	if ((*hdr_pp)->ioc_version != hdr.ioc_version ||
> -	    (*hdr_pp)->ioc_len != hdr.ioc_len) {
> -		err = -EINVAL;
> -		goto free;
> -	}
> -
> -	return 0;
> -
> -free:
> -	kvfree(*hdr_pp);
> -	return err;
> -}
> diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c
> index 3fb150a57f49..4c4bf09a78dd 100644
> --- a/drivers/staging/lustre/lnet/libcfs/module.c
> +++ b/drivers/staging/lustre/lnet/libcfs/module.c
> @@ -95,6 +95,137 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
>  }
>  EXPORT_SYMBOL(libcfs_deregister_ioctl);
>  
> +static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
> +{
> +	size_t len = sizeof(*data);
> +
> +	len += cfs_size_round(data->ioc_inllen1);
> +	len += cfs_size_round(data->ioc_inllen2);
> +	return len;
> +}
> +
> +static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
> +{
> +	if (data->ioc_hdr.ioc_len > BIT(30)) {
> +		CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n");
> +		return true;
> +	}
> +	if (data->ioc_inllen1 > BIT(30)) {
> +		CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
> +		return true;
> +	}
> +	if (data->ioc_inllen2 > BIT(30)) {
> +		CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
> +		return true;
> +	}
> +	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
> +		CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
> +		return true;
> +	}
> +	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
> +		CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
> +		return true;
> +	}
> +	if (data->ioc_pbuf1 && !data->ioc_plen1) {
> +		CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
> +		return true;
> +	}
> +	if (data->ioc_pbuf2 && !data->ioc_plen2) {
> +		CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
> +		return true;
> +	}
> +	if (data->ioc_plen1 && !data->ioc_pbuf1) {
> +		CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
> +		return true;
> +	}
> +	if (data->ioc_plen2 && !data->ioc_pbuf2) {
> +		CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
> +		return true;
> +	}
> +	if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
> +		CERROR("LIBCFS ioctl: packlen != ioc_len\n");
> +		return true;
> +	}
> +	if (data->ioc_inllen1 &&
> +	    data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
> +		CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
> +		return true;
> +	}
> +	if (data->ioc_inllen2 &&
> +	    data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
> +			   data->ioc_inllen2 - 1] != '\0') {
> +		CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
> +		return true;
> +	}
> +	return false;
> +}
> +
> +static int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
> +{
> +	if (libcfs_ioctl_is_invalid(data)) {
> +		CERROR("libcfs ioctl: parameter not correctly formatted\n");
> +		return -EINVAL;
> +	}
> +
> +	if (data->ioc_inllen1)
> +		data->ioc_inlbuf1 = &data->ioc_bulk[0];
> +
> +	if (data->ioc_inllen2)
> +		data->ioc_inlbuf2 = &data->ioc_bulk[0] +
> +			cfs_size_round(data->ioc_inllen1);
> +
> +	return 0;
> +}
> +
> +static int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
> +			 const struct libcfs_ioctl_hdr __user *uhdr)
> +{
> +	struct libcfs_ioctl_hdr hdr;
> +	int err;
> +
> +	if (copy_from_user(&hdr, uhdr, sizeof(hdr)))
> +		return -EFAULT;
> +
> +	if (hdr.ioc_version != LIBCFS_IOCTL_VERSION &&
> +	    hdr.ioc_version != LIBCFS_IOCTL_VERSION2) {
> +		CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n",
> +		       LIBCFS_IOCTL_VERSION, hdr.ioc_version);
> +		return -EINVAL;
> +	}
> +
> +	if (hdr.ioc_len < sizeof(hdr)) {
> +		CERROR("libcfs ioctl: user buffer too small for ioctl\n");
> +		return -EINVAL;
> +	}
> +
> +	if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) {
> +		CERROR("libcfs ioctl: user buffer is too large %d/%d\n",
> +		       hdr.ioc_len, LIBCFS_IOC_DATA_MAX);
> +		return -EINVAL;
> +	}
> +
> +	*hdr_pp = kvmalloc(hdr.ioc_len, GFP_KERNEL);
> +	if (!*hdr_pp)
> +		return -ENOMEM;
> +
> +	if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) {
> +		err = -EFAULT;
> +		goto free;
> +	}
> +
> +	if ((*hdr_pp)->ioc_version != hdr.ioc_version ||
> +	    (*hdr_pp)->ioc_len != hdr.ioc_len) {
> +		err = -EINVAL;
> +		goto free;
> +	}
> +
> +	return 0;
> +
> +free:
> +	kvfree(*hdr_pp);
> +	return err;
> +}
> +
>  static int libcfs_ioctl(unsigned long cmd, void __user *uparam)
>  {
>  	struct libcfs_ioctl_data *data = NULL;
> 
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [lustre-devel] [PATCH 04/10] staging: lustre: lu_object: move retry logic inside htable_lookup
  2018-05-01  8:22   ` [lustre-devel] " Dilger, Andreas
@ 2018-05-02 18:21     ` James Simmons
  2018-05-04  0:30       ` NeilBrown
  2018-05-04  1:30     ` NeilBrown
  1 sibling, 1 reply; 28+ messages in thread
From: James Simmons @ 2018-05-02 18:21 UTC (permalink / raw)
  To: Dilger, Andreas
  Cc: NeilBrown, Drokin, Oleg, Greg Kroah-Hartman,
	Linux Kernel Mailing List, Lustre Development List


> On Apr 30, 2018, at 21:52, NeilBrown <neilb@suse.com> wrote:
> > 
> > The current retry logic, to wait when a 'dying' object is found,
> > spans multiple functions.  The process is attached to a waitqueue
> > and set TASK_UNINTERRUPTIBLE in htable_lookup, and this status
> > is passed back through lu_object_find_try() to lu_object_find_at()
> > where schedule() is called and the process is removed from the queue.
> > 
> > This can be simplified by moving all the logic (including
> > hashtable locking) inside htable_lookup(), which now never returns
> > EAGAIN.
> > 
> > Note that htable_lookup() is called with the hash bucket lock
> > held, and will drop and retake it if it needs to schedule.
> > 
> > I made this a 'goto' loop rather than a 'while(1)' loop as the
> > diff is easier to read.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.com>
> > ---
> > drivers/staging/lustre/lustre/obdclass/lu_object.c |   73 +++++++-------------
> > 1 file changed, 27 insertions(+), 46 deletions(-)
> > 
> > diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> > index 2bf089817157..93daa52e2535 100644
> > --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
> > +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> > @@ -586,16 +586,21 @@ EXPORT_SYMBOL(lu_object_print);
> > static struct lu_object *htable_lookup(struct lu_site *s,
> 
> It's probably a good idea to add a comment for this function that it may
> drop and re-acquire the hash bucket lock internally.
> 
> > 				       struct cfs_hash_bd *bd,
> > 				       const struct lu_fid *f,
> > -				       wait_queue_entry_t *waiter,
> > 				       __u64 *version)
> > {
> > +	struct cfs_hash		*hs = s->ls_obj_hash;
> > 	struct lu_site_bkt_data *bkt;
> > 	struct lu_object_header *h;
> > 	struct hlist_node	*hnode;
> > -	__u64  ver = cfs_hash_bd_version_get(bd);
> > +	__u64 ver;
> > +	wait_queue_entry_t waiter;
> > 
> > -	if (*version == ver)
> > +retry:
> > +	ver = cfs_hash_bd_version_get(bd);
> > +
> > +	if (*version == ver) {
> > 		return ERR_PTR(-ENOENT);
> > +	}
> 
> (style) we don't need the {} around a single-line if statement

I hate to be that guy but could you run checkpatch on your patches.
 
> > 	*version = ver;
> > 	bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
> > @@ -625,11 +630,15 @@ static struct lu_object *htable_lookup(struct lu_site *s,
> > 	 * drained), and moreover, lookup has to wait until object is freed.
> > 	 */
> > 
> > -	init_waitqueue_entry(waiter, current);
> > -	add_wait_queue(&bkt->lsb_marche_funebre, waiter);
> > +	init_waitqueue_entry(&waiter, current);
> > +	add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
> > 	set_current_state(TASK_UNINTERRUPTIBLE);
> > 	lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
> > -	return ERR_PTR(-EAGAIN);
> > +	cfs_hash_bd_unlock(hs, bd, 1);
> 
> This looks like it isn't unlocking and locking the hash bucket in the same
> manner that it was done in the caller.  Here excl = 1, but in the caller
> you changed it to excl = 0?

This is very much like the work done by Lai. The difference is Lai remove 
the work queue handling complete in htable_lookup(). You can see the 
details at https://jira.hpdd.intel.com/browse/LU-9049. I will push the 
missing lu_object fixes including LU-9049 on top of your patch set so you
can see the approach Lai did. Form their we can figure out merge the 
lu_object work and fixing the issues Andreas and I pointed out.

> > +	schedule();
> > +	remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
> 
> Is it worthwhile to use your new helper function here to get the wq from "s"?
> 
> > +	cfs_hash_bd_lock(hs, bd, 1);
> > +	goto retry;
> > }
> > 
> > /**
> > @@ -693,13 +702,14 @@ static struct lu_object *lu_object_new(const struct lu_env *env,
> > }
> > 
> > /**
> > - * Core logic of lu_object_find*() functions.
> > + * Much like lu_object_find(), but top level device of object is specifically
> > + * \a dev rather than top level device of the site. This interface allows
> > + * objects of different "stacking" to be created within the same site.
> >  */
> > -static struct lu_object *lu_object_find_try(const struct lu_env *env,
> > -					    struct lu_device *dev,
> > -					    const struct lu_fid *f,
> > -					    const struct lu_object_conf *conf,
> > -					    wait_queue_entry_t *waiter)
> > +struct lu_object *lu_object_find_at(const struct lu_env *env,
> > +				    struct lu_device *dev,
> > +				    const struct lu_fid *f,
> > +				    const struct lu_object_conf *conf)
> > {
> > 	struct lu_object      *o;
> > 	struct lu_object      *shadow;
> > @@ -725,17 +735,16 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
> > 	 * It is unnecessary to perform lookup-alloc-lookup-insert, instead,
> > 	 * just alloc and insert directly.
> > 	 *
> > -	 * If dying object is found during index search, add @waiter to the
> > -	 * site wait-queue and return ERR_PTR(-EAGAIN).
> > 	 */
> > 	if (conf && conf->loc_flags & LOC_F_NEW)
> > 		return lu_object_new(env, dev, f, conf);
> > 
> > 	s  = dev->ld_site;
> > 	hs = s->ls_obj_hash;
> > -	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
> > -	o = htable_lookup(s, &bd, f, waiter, &version);
> > -	cfs_hash_bd_unlock(hs, &bd, 1);
> > +	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 0);
> > +	o = htable_lookup(s, &bd, f, &version);
> > +	cfs_hash_bd_unlock(hs, &bd, 0);
> 
> Here you changed the locking to a non-exclusive (read) lock instead of an
> exclusive (write) lock?  Why.

I have the same question.

> 
> > +
> > 	if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
> > 		return o;
> > 
> > @@ -751,7 +760,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
> > 
> > 	cfs_hash_bd_lock(hs, &bd, 1);
> > 
> > -	shadow = htable_lookup(s, &bd, f, waiter, &version);
> > +	shadow = htable_lookup(s, &bd, f, &version);
> > 	if (likely(PTR_ERR(shadow) == -ENOENT)) {
> > 		cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
> > 		cfs_hash_bd_unlock(hs, &bd, 1);
> > @@ -766,34 +775,6 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
> > 	lu_object_free(env, o);
> > 	return shadow;
> > }
> > -
> > -/**
> > - * Much like lu_object_find(), but top level device of object is specifically
> > - * \a dev rather than top level device of the site. This interface allows
> > - * objects of different "stacking" to be created within the same site.
> > - */
> > -struct lu_object *lu_object_find_at(const struct lu_env *env,
> > -				    struct lu_device *dev,
> > -				    const struct lu_fid *f,
> > -				    const struct lu_object_conf *conf)
> > -{
> > -	wait_queue_head_t	*wq;
> > -	struct lu_object	*obj;
> > -	wait_queue_entry_t	   wait;
> > -
> > -	while (1) {
> > -		obj = lu_object_find_try(env, dev, f, conf, &wait);
> > -		if (obj != ERR_PTR(-EAGAIN))
> > -			return obj;
> > -		/*
> > -		 * lu_object_find_try() already added waiter into the
> > -		 * wait queue.
> > -		 */
> > -		schedule();
> > -		wq = lu_site_wq_from_fid(dev->ld_site, (void *)f);
> > -		remove_wait_queue(wq, &wait);
> > -	}
> > -}
> > EXPORT_SYMBOL(lu_object_find_at);
> > 
> > /**
> > 
> > 
> > _______________________________________________
> > lustre-devel mailing list
> > lustre-devel@lists.lustre.org
> > http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org
> 
> Cheers, Andreas
> --
> Andreas Dilger
> Lustre Principal Architect
> Intel Corporation
> 
> 
> 
> 
> 
> 
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 02/10] staging: lustre: make struct lu_site_bkt_data private
  2018-05-02  3:02   ` James Simmons
@ 2018-05-03 23:39     ` NeilBrown
  2018-05-07  1:42       ` Greg Kroah-Hartman
  0 siblings, 1 reply; 28+ messages in thread
From: NeilBrown @ 2018-05-03 23:39 UTC (permalink / raw)
  To: James Simmons
  Cc: Oleg Drokin, Greg Kroah-Hartman, Andreas Dilger,
	Linux Kernel Mailing List, Lustre Development List

[-- Attachment #1: Type: text/plain, Size: 7733 bytes --]

On Wed, May 02 2018, James Simmons wrote:

>> This data structure only needs to be public so that
>> various modules can access a wait queue to wait for object
>> destruction.
>> If we provide a function to get the wait queue, rather than the
>> whole bucket, the structure can be made private.
>> 
>> Signed-off-by: NeilBrown <neilb@suse.com>
>> ---
>>  drivers/staging/lustre/lustre/include/lu_object.h  |   36 +-------------
>>  drivers/staging/lustre/lustre/llite/lcommon_cl.c   |    8 ++-
>>  drivers/staging/lustre/lustre/lov/lov_object.c     |    8 ++-
>>  drivers/staging/lustre/lustre/obdclass/lu_object.c |   50 +++++++++++++++++---
>>  4 files changed, 54 insertions(+), 48 deletions(-)
>> 
>> diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
>> index c3b0ed518819..f29bbca5af65 100644
>> --- a/drivers/staging/lustre/lustre/include/lu_object.h
>> +++ b/drivers/staging/lustre/lustre/include/lu_object.h
>> @@ -549,31 +549,7 @@ struct lu_object_header {
>>  };
>>  
>>  struct fld;
>> -
>> -struct lu_site_bkt_data {
>> -	/**
>> -	 * number of object in this bucket on the lsb_lru list.
>> -	 */
>> -	long			lsb_lru_len;
>> -	/**
>> -	 * LRU list, updated on each access to object. Protected by
>> -	 * bucket lock of lu_site::ls_obj_hash.
>> -	 *
>> -	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
>> -	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
>> -	 * of list_for_each_entry_safe_reverse()).
>> -	 */
>> -	struct list_head		lsb_lru;
>> -	/**
>> -	 * Wait-queue signaled when an object in this site is ultimately
>> -	 * destroyed (lu_object_free()). It is used by lu_object_find() to
>> -	 * wait before re-trying when object in the process of destruction is
>> -	 * found in the hash table.
>> -	 *
>> -	 * \see htable_lookup().
>> -	 */
>> -	wait_queue_head_t	       lsb_marche_funebre;
>> -};
>> +struct lu_site_bkt_data;
>>  
>>  enum {
>>  	LU_SS_CREATED	 = 0,
>> @@ -642,14 +618,8 @@ struct lu_site {
>>  	struct percpu_counter	 ls_lru_len_counter;
>>  };
>>  
>> -static inline struct lu_site_bkt_data *
>> -lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid)
>> -{
>> -	struct cfs_hash_bd bd;
>> -
>> -	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
>> -	return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
>> -}
>> +wait_queue_head_t *
>> +lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid);
>>  
>>  static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
>>  {
>> diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
>> index df5c0c0ae703..d5b42fb1d601 100644
>> --- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
>> +++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
>> @@ -211,12 +211,12 @@ static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
>>  
>>  	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
>>  		struct lu_site *site = obj->co_lu.lo_dev->ld_site;
>> -		struct lu_site_bkt_data *bkt;
>> +		wait_queue_head_t *wq;
>>  
>> -		bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
>> +		wq = lu_site_wq_from_fid(site, &header->loh_fid);
>>  
>>  		init_waitqueue_entry(&waiter, current);
>> -		add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
>> +		add_wait_queue(wq, &waiter);
>>  
>>  		while (1) {
>>  			set_current_state(TASK_UNINTERRUPTIBLE);
>> @@ -226,7 +226,7 @@ static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
>>  		}
>>  
>>  		set_current_state(TASK_RUNNING);
>> -		remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
>> +		remove_wait_queue(wq, &waiter);
>>  	}
>>  
>>  	cl_object_put(env, obj);
>> diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
>> index f7c69680cb7d..adc90f310fd7 100644
>> --- a/drivers/staging/lustre/lustre/lov/lov_object.c
>> +++ b/drivers/staging/lustre/lustre/lov/lov_object.c
>> @@ -370,7 +370,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
>>  	struct cl_object	*sub;
>>  	struct lov_layout_raid0 *r0;
>>  	struct lu_site	  *site;
>> -	struct lu_site_bkt_data *bkt;
>> +	wait_queue_head_t *wq;
>>  	wait_queue_entry_t	  *waiter;
>>  
>>  	r0  = &lov->u.raid0;
>> @@ -378,7 +378,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
>>  
>>  	sub  = lovsub2cl(los);
>>  	site = sub->co_lu.lo_dev->ld_site;
>> -	bkt  = lu_site_bkt_from_fid(site, &sub->co_lu.lo_header->loh_fid);
>> +	wq   = lu_site_wq_from_fid(site, &sub->co_lu.lo_header->loh_fid);
>>  
>>  	cl_object_kill(env, sub);
>>  	/* release a reference to the sub-object and ... */
>> @@ -391,7 +391,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
>>  	if (r0->lo_sub[idx] == los) {
>>  		waiter = &lov_env_info(env)->lti_waiter;
>>  		init_waitqueue_entry(waiter, current);
>> -		add_wait_queue(&bkt->lsb_marche_funebre, waiter);
>> +		add_wait_queue(wq, waiter);
>>  		set_current_state(TASK_UNINTERRUPTIBLE);
>>  		while (1) {
>>  			/* this wait-queue is signaled at the end of
>> @@ -408,7 +408,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
>>  				break;
>>  			}
>>  		}
>> -		remove_wait_queue(&bkt->lsb_marche_funebre, waiter);
>> +		remove_wait_queue(wq, waiter);
>>  	}
>>  	LASSERT(!r0->lo_sub[idx]);
>>  }
>> diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> index 3de7dc0497c4..2a8a25d6edb5 100644
>> --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> @@ -56,6 +56,31 @@
>>  #include <lu_ref.h>
>>  #include <linux/list.h>
>>  
>> +struct lu_site_bkt_data {
>> +	/**
>> +	 * number of object in this bucket on the lsb_lru list.
>> +	 */
>> +	long			lsb_lru_len;
>> +	/**
>> +	 * LRU list, updated on each access to object. Protected by
>> +	 * bucket lock of lu_site::ls_obj_hash.
>> +	 *
>> +	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
>> +	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
>> +	 * of list_for_each_entry_safe_reverse()).
>> +	 */
>> +	struct list_head		lsb_lru;
>> +	/**
>> +	 * Wait-queue signaled when an object in this site is ultimately
>> +	 * destroyed (lu_object_free()). It is used by lu_object_find() to
>> +	 * wait before re-trying when object in the process of destruction is
>> +	 * found in the hash table.
>> +	 *
>> +	 * \see htable_lookup().
>> +	 */
>> +	wait_queue_head_t	       lsb_marche_funebre;
>> +};
>> +
>>  enum {
>>  	LU_CACHE_PERCENT_MAX	 = 50,
>>  	LU_CACHE_PERCENT_DEFAULT = 20
>> @@ -88,6 +113,17 @@ MODULE_PARM_DESC(lu_cache_nr, "Maximum number of objects in lu_object cache");
>>  static void lu_object_free(const struct lu_env *env, struct lu_object *o);
>>  static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx);
>>  
>> +wait_queue_head_t *
>> +lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid)
>> +{
>> +	struct cfs_hash_bd bd;
>> +	struct lu_site_bkt_data *bkt;
>> +
>> +	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
>> +	bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
>> +	return &bkt->lsb_marche_funebre;
>> +}
>> +
>
> Nak. You forgot the EXPORT_SYMBOL for this. Once I added the export symbol
> it seems to work so far in my testing.

Thanks!  I'll try to remember the occasional build-test with modules.

Thanks,
NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 03/10] staging: lustre: lu_object: discard extra lru count.
  2018-05-01  4:19   ` Dilger, Andreas
@ 2018-05-04  0:08     ` NeilBrown
  0 siblings, 0 replies; 28+ messages in thread
From: NeilBrown @ 2018-05-04  0:08 UTC (permalink / raw)
  To: Dilger, Andreas
  Cc: Drokin, Oleg, Greg Kroah-Hartman, James Simmons,
	Linux Kernel Mailing List, Lustre Development List

[-- Attachment #1: Type: text/plain, Size: 5366 bytes --]

On Tue, May 01 2018, Dilger, Andreas wrote:

> On Apr 30, 2018, at 21:52, NeilBrown <neilb@suse.com> wrote:
>> 
>> lu_object maintains 2 lru counts.
>> One is a per-bucket lsb_lru_len.
>> The other is the per-cpu ls_lru_len_counter.
>> 
>> The only times the per-bucket counters are use are:
>> - a debug message when an object is added
>> - in lu_site_stats_get when all the counters are combined.
>> 
>> The debug message is not essential, and the per-cpu counter
>> can be used to get the combined total.
>> 
>> So discard the per-bucket lsb_lru_len.
>> 
>> Signed-off-by: NeilBrown <neilb@suse.com>
>
> Looks reasonable, though it would also be possible to fix the percpu
> functions rather than adding a workaround in this code.

It would be possibly to change percpu_counter_read_positive() to
take a const (and we should), but thanks to you highlighting this
I realized that I should be using percpu_counter_sum_positive(),
to read across all per-cpu counters.  That takes a spinlock
so it would be a much harder sell to get it to accept a
const pointer.

Thanks,
NeilBrown


>
> Reviewed-by: Andreas Dilger <andreas.dilger@dilger.ca>
>
>> ---
>> drivers/staging/lustre/lustre/obdclass/lu_object.c |   24 ++++++++------------
>> 1 file changed, 9 insertions(+), 15 deletions(-)
>> 
>> diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> index 2a8a25d6edb5..2bf089817157 100644
>> --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> @@ -57,10 +57,6 @@
>> #include <linux/list.h>
>> 
>> struct lu_site_bkt_data {
>> -	/**
>> -	 * number of object in this bucket on the lsb_lru list.
>> -	 */
>> -	long			lsb_lru_len;
>> 	/**
>> 	 * LRU list, updated on each access to object. Protected by
>> 	 * bucket lock of lu_site::ls_obj_hash.
>> @@ -187,10 +183,9 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o)
>> 	if (!lu_object_is_dying(top)) {
>> 		LASSERT(list_empty(&top->loh_lru));
>> 		list_add_tail(&top->loh_lru, &bkt->lsb_lru);
>> -		bkt->lsb_lru_len++;
>> 		percpu_counter_inc(&site->ls_lru_len_counter);
>> -		CDEBUG(D_INODE, "Add %p to site lru. hash: %p, bkt: %p, lru_len: %ld\n",
>> -		       o, site->ls_obj_hash, bkt, bkt->lsb_lru_len);
>> +		CDEBUG(D_INODE, "Add %p to site lru. hash: %p, bkt: %p\n",
>> +		       o, site->ls_obj_hash, bkt);
>> 		cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1);
>> 		return;
>> 	}
>> @@ -238,7 +233,6 @@ void lu_object_unhash(const struct lu_env *env, struct lu_object *o)
>> 
>> 			list_del_init(&top->loh_lru);
>> 			bkt = cfs_hash_bd_extra_get(obj_hash, &bd);
>> -			bkt->lsb_lru_len--;
>> 			percpu_counter_dec(&site->ls_lru_len_counter);
>> 		}
>> 		cfs_hash_bd_del_locked(obj_hash, &bd, &top->loh_hash);
>> @@ -422,7 +416,6 @@ int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s,
>> 			cfs_hash_bd_del_locked(s->ls_obj_hash,
>> 					       &bd2, &h->loh_hash);
>> 			list_move(&h->loh_lru, &dispose);
>> -			bkt->lsb_lru_len--;
>> 			percpu_counter_dec(&s->ls_lru_len_counter);
>> 			if (did_sth == 0)
>> 				did_sth = 1;
>> @@ -621,7 +614,6 @@ static struct lu_object *htable_lookup(struct lu_site *s,
>> 		lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
>> 		if (!list_empty(&h->loh_lru)) {
>> 			list_del_init(&h->loh_lru);
>> -			bkt->lsb_lru_len--;
>> 			percpu_counter_dec(&s->ls_lru_len_counter);
>> 		}
>> 		return lu_object_top(h);
>> @@ -1834,19 +1826,21 @@ struct lu_site_stats {
>> 	unsigned int	lss_busy;
>> };
>> 
>> -static void lu_site_stats_get(struct cfs_hash *hs,
>> +static void lu_site_stats_get(const struct lu_site *s,
>> 			      struct lu_site_stats *stats, int populated)
>> {
>> +	struct cfs_hash *hs = s->ls_obj_hash;
>> 	struct cfs_hash_bd bd;
>> 	unsigned int i;
>> +	/* percpu_counter_read_positive() won't accept a const pointer */
>> +	struct lu_site *s2 = (struct lu_site *)s;
>
> It would seem worthwhile to change the percpu_counter_read_positive() and
> percpu_counter_read() arguments to be "const struct percpu_counter *fbc",
> rather than doing this cast here.  I can't see any reason that would be bad,
> since both implementations just access fbc->count, and do not modify anything.
>
>> +	stats->lss_busy += cfs_hash_size_get(hs) -
>> +		percpu_counter_read_positive(&s2->ls_lru_len_counter);
>> 	cfs_hash_for_each_bucket(hs, &bd, i) {
>> -		struct lu_site_bkt_data *bkt = cfs_hash_bd_extra_get(hs, &bd);
>> 		struct hlist_head	*hhead;
>> 
>> 		cfs_hash_bd_lock(hs, &bd, 1);
>> -		stats->lss_busy  +=
>> -			cfs_hash_bd_count_get(&bd) - bkt->lsb_lru_len;
>> 		stats->lss_total += cfs_hash_bd_count_get(&bd);
>> 		stats->lss_max_search = max((int)stats->lss_max_search,
>> 					    cfs_hash_bd_depmax_get(&bd));
>> @@ -2039,7 +2033,7 @@ int lu_site_stats_print(const struct lu_site *s, struct seq_file *m)
>> 	struct lu_site_stats stats;
>> 
>> 	memset(&stats, 0, sizeof(stats));
>> -	lu_site_stats_get(s->ls_obj_hash, &stats, 1);
>> +	lu_site_stats_get(s, &stats, 1);
>> 
>> 	seq_printf(m, "%d/%d %d/%ld %d %d %d %d %d %d %d\n",
>> 		   stats.lss_busy,
>> 
>> 
>
> Cheers, Andreas
> --
> Andreas Dilger
> Lustre Principal Architect
> Intel Corporation

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [lustre-devel] [PATCH 04/10] staging: lustre: lu_object: move retry logic inside htable_lookup
  2018-05-02 18:21     ` James Simmons
@ 2018-05-04  0:30       ` NeilBrown
  0 siblings, 0 replies; 28+ messages in thread
From: NeilBrown @ 2018-05-04  0:30 UTC (permalink / raw)
  To: James Simmons, Dilger, Andreas
  Cc: Drokin, Oleg, Greg Kroah-Hartman, Linux Kernel Mailing List,
	Lustre Development List

[-- Attachment #1: Type: text/plain, Size: 4306 bytes --]

On Wed, May 02 2018, James Simmons wrote:

>> On Apr 30, 2018, at 21:52, NeilBrown <neilb@suse.com> wrote:
>> > 
>> > The current retry logic, to wait when a 'dying' object is found,
>> > spans multiple functions.  The process is attached to a waitqueue
>> > and set TASK_UNINTERRUPTIBLE in htable_lookup, and this status
>> > is passed back through lu_object_find_try() to lu_object_find_at()
>> > where schedule() is called and the process is removed from the queue.
>> > 
>> > This can be simplified by moving all the logic (including
>> > hashtable locking) inside htable_lookup(), which now never returns
>> > EAGAIN.
>> > 
>> > Note that htable_lookup() is called with the hash bucket lock
>> > held, and will drop and retake it if it needs to schedule.
>> > 
>> > I made this a 'goto' loop rather than a 'while(1)' loop as the
>> > diff is easier to read.
>> > 
>> > Signed-off-by: NeilBrown <neilb@suse.com>
>> > ---
>> > drivers/staging/lustre/lustre/obdclass/lu_object.c |   73 +++++++-------------
>> > 1 file changed, 27 insertions(+), 46 deletions(-)
>> > 
>> > diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> > index 2bf089817157..93daa52e2535 100644
>> > --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> > +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> > @@ -586,16 +586,21 @@ EXPORT_SYMBOL(lu_object_print);
>> > static struct lu_object *htable_lookup(struct lu_site *s,
>> 
>> It's probably a good idea to add a comment for this function that it may
>> drop and re-acquire the hash bucket lock internally.
>> 
>> > 				       struct cfs_hash_bd *bd,
>> > 				       const struct lu_fid *f,
>> > -				       wait_queue_entry_t *waiter,
>> > 				       __u64 *version)
>> > {
>> > +	struct cfs_hash		*hs = s->ls_obj_hash;
>> > 	struct lu_site_bkt_data *bkt;
>> > 	struct lu_object_header *h;
>> > 	struct hlist_node	*hnode;
>> > -	__u64  ver = cfs_hash_bd_version_get(bd);
>> > +	__u64 ver;
>> > +	wait_queue_entry_t waiter;
>> > 
>> > -	if (*version == ver)
>> > +retry:
>> > +	ver = cfs_hash_bd_version_get(bd);
>> > +
>> > +	if (*version == ver) {
>> > 		return ERR_PTR(-ENOENT);
>> > +	}
>> 
>> (style) we don't need the {} around a single-line if statement
>
> I hate to be that guy but could you run checkpatch on your patches.
>  

Someone's got to be "that guy" - thanks.
I have (at last) modified my patch-preparation script to run checkpatch
and show me all the errors that I'm about to post.


>> > 	*version = ver;
>> > 	bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
>> > @@ -625,11 +630,15 @@ static struct lu_object *htable_lookup(struct lu_site *s,
>> > 	 * drained), and moreover, lookup has to wait until object is freed.
>> > 	 */
>> > 
>> > -	init_waitqueue_entry(waiter, current);
>> > -	add_wait_queue(&bkt->lsb_marche_funebre, waiter);
>> > +	init_waitqueue_entry(&waiter, current);
>> > +	add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
>> > 	set_current_state(TASK_UNINTERRUPTIBLE);
>> > 	lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
>> > -	return ERR_PTR(-EAGAIN);
>> > +	cfs_hash_bd_unlock(hs, bd, 1);
>> 
>> This looks like it isn't unlocking and locking the hash bucket in the same
>> manner that it was done in the caller.  Here excl = 1, but in the caller
>> you changed it to excl = 0?
>
> This is very much like the work done by Lai. The difference is Lai remove 
> the work queue handling complete in htable_lookup(). You can see the 
> details at https://jira.hpdd.intel.com/browse/LU-9049. I will push the 
> missing lu_object fixes including LU-9049 on top of your patch set so you
> can see the approach Lai did. Form their we can figure out merge the 
> lu_object work and fixing the issues Andreas and I pointed out.

I think I did see that before but didn't feel I understood it enough to
do anything with, so I deferred it.  Having the patches that you
provided, I think it is starting the make more sense.  Once I resubmit
this current series I'll have a closer look.  Probably we can just
apply the series you sent on top of mine - I might even combine the two
- and the think about whatever else needs doing.

Thanks,
NeilBrown


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 10/10] staging: lustre: fix error deref in ll_splice_alias().
  2018-05-02  3:05   ` James Simmons
@ 2018-05-04  0:34     ` NeilBrown
  0 siblings, 0 replies; 28+ messages in thread
From: NeilBrown @ 2018-05-04  0:34 UTC (permalink / raw)
  To: James Simmons
  Cc: Oleg Drokin, Greg Kroah-Hartman, Andreas Dilger,
	Linux Kernel Mailing List, Lustre Development List

[-- Attachment #1: Type: text/plain, Size: 1741 bytes --]

On Wed, May 02 2018, James Simmons wrote:

>> d_splice_alias() can return an ERR_PTR().
>> If it does while debugging is enabled, the following
>> CDEBUG() will dereference that error and crash.
>> 
>> So add appropriate checking, and provide a separate
>> debug message for the error case.
>
> Yeah!!! It fixed the issues. Thank you.

:-)
So I've made it "Reported-and-tested-by: James"

Thanks,
NeilBrown


>
> Reviewed-by: James Simmons <jsimmons@infradead.org>
>  
>> Reported-by: James Simmons <jsimmons@infradead.org>
>> Fixes: e9d4f0b9f559 ("staging: lustre: llite: use d_splice_alias for directories.")
>> Signed-off-by: NeilBrown <neilb@suse.com>
>> ---
>>  drivers/staging/lustre/lustre/llite/namei.c |    8 ++++++--
>>  1 file changed, 6 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
>> index 6c9ec462eb41..24a6873d86a2 100644
>> --- a/drivers/staging/lustre/lustre/llite/namei.c
>> +++ b/drivers/staging/lustre/lustre/llite/namei.c
>> @@ -442,11 +442,15 @@ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
>>  	} else {
>>  		struct dentry *new = d_splice_alias(inode, de);
>>  
>> +		if (IS_ERR(new))
>> +			CDEBUG(D_DENTRY, "splice inode %p as %pd gives error %lu\n",
>> +			       inode, de, PTR_ERR(new));
>>  		if (new)
>>  			de = new;
>>  	}
>> -	CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
>> -	       de, d_inode(de), d_count(de), de->d_flags);
>> +	if (!IS_ERR(de))
>> +		CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
>> +		       de, d_inode(de), d_count(de), de->d_flags);
>>  	return de;
>>  }
>>  
>> 
>> 
>> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [lustre-devel] [PATCH 04/10] staging: lustre: lu_object: move retry logic inside htable_lookup
  2018-05-01  8:22   ` [lustre-devel] " Dilger, Andreas
  2018-05-02 18:21     ` James Simmons
@ 2018-05-04  1:30     ` NeilBrown
  1 sibling, 0 replies; 28+ messages in thread
From: NeilBrown @ 2018-05-04  1:30 UTC (permalink / raw)
  To: Dilger, Andreas
  Cc: Drokin, Oleg, Greg Kroah-Hartman, James Simmons,
	Linux Kernel Mailing List, Lustre Development List

[-- Attachment #1: Type: text/plain, Size: 5658 bytes --]

On Tue, May 01 2018, Dilger, Andreas wrote:

> On Apr 30, 2018, at 21:52, NeilBrown <neilb@suse.com> wrote:
>> 
>> The current retry logic, to wait when a 'dying' object is found,
>> spans multiple functions.  The process is attached to a waitqueue
>> and set TASK_UNINTERRUPTIBLE in htable_lookup, and this status
>> is passed back through lu_object_find_try() to lu_object_find_at()
>> where schedule() is called and the process is removed from the queue.
>> 
>> This can be simplified by moving all the logic (including
>> hashtable locking) inside htable_lookup(), which now never returns
>> EAGAIN.
>> 
>> Note that htable_lookup() is called with the hash bucket lock
>> held, and will drop and retake it if it needs to schedule.
>> 
>> I made this a 'goto' loop rather than a 'while(1)' loop as the
>> diff is easier to read.
>> 
>> Signed-off-by: NeilBrown <neilb@suse.com>
>> ---
>> drivers/staging/lustre/lustre/obdclass/lu_object.c |   73 +++++++-------------
>> 1 file changed, 27 insertions(+), 46 deletions(-)
>> 
>> diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> index 2bf089817157..93daa52e2535 100644
>> --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
>> @@ -586,16 +586,21 @@ EXPORT_SYMBOL(lu_object_print);
>> static struct lu_object *htable_lookup(struct lu_site *s,
>
> It's probably a good idea to add a comment for this function that it may
> drop and re-acquire the hash bucket lock internally.

Added - thanks.

>
>> 				       struct cfs_hash_bd *bd,
>> 				       const struct lu_fid *f,
>> -				       wait_queue_entry_t *waiter,
>> 				       __u64 *version)
>> {
>> +	struct cfs_hash		*hs = s->ls_obj_hash;
>> 	struct lu_site_bkt_data *bkt;
>> 	struct lu_object_header *h;
>> 	struct hlist_node	*hnode;
>> -	__u64  ver = cfs_hash_bd_version_get(bd);
>> +	__u64 ver;
>> +	wait_queue_entry_t waiter;
>> 
>> -	if (*version == ver)
>> +retry:
>> +	ver = cfs_hash_bd_version_get(bd);
>> +
>> +	if (*version == ver) {
>> 		return ERR_PTR(-ENOENT);
>> +	}
>
> (style) we don't need the {} around a single-line if statement

Fixed.

>
>> 	*version = ver;
>> 	bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
>> @@ -625,11 +630,15 @@ static struct lu_object *htable_lookup(struct lu_site *s,
>> 	 * drained), and moreover, lookup has to wait until object is freed.
>> 	 */
>> 
>> -	init_waitqueue_entry(waiter, current);
>> -	add_wait_queue(&bkt->lsb_marche_funebre, waiter);
>> +	init_waitqueue_entry(&waiter, current);
>> +	add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
>> 	set_current_state(TASK_UNINTERRUPTIBLE);
>> 	lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
>> -	return ERR_PTR(-EAGAIN);
>> +	cfs_hash_bd_unlock(hs, bd, 1);
>
> This looks like it isn't unlocking and locking the hash bucket in the same
> manner that it was done in the caller.  Here excl = 1, but in the caller
> you changed it to excl = 0?

Don't know what happened there... though as the tables is created with
CFS_HASH_SPIN_BLKLOCK it doesn't make any behavioral difference.
I've put it back to use '1' uniformly.

>
>> +	schedule();
>> +	remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
>
> Is it worthwhile to use your new helper function here to get the wq from "s"?

I don't think so.  We already have the 'bkt' and it seems pointless to
compute the hash a second time and use it to find the bucket and then
the queue, just to use a nice wrapper function.


>
>> +	cfs_hash_bd_lock(hs, bd, 1);
>> +	goto retry;
>> }
>> 
>> /**
>> @@ -693,13 +702,14 @@ static struct lu_object *lu_object_new(const struct lu_env *env,
>> }
>> 
>> /**
>> - * Core logic of lu_object_find*() functions.
>> + * Much like lu_object_find(), but top level device of object is specifically
>> + * \a dev rather than top level device of the site. This interface allows
>> + * objects of different "stacking" to be created within the same site.
>>  */
>> -static struct lu_object *lu_object_find_try(const struct lu_env *env,
>> -					    struct lu_device *dev,
>> -					    const struct lu_fid *f,
>> -					    const struct lu_object_conf *conf,
>> -					    wait_queue_entry_t *waiter)
>> +struct lu_object *lu_object_find_at(const struct lu_env *env,
>> +				    struct lu_device *dev,
>> +				    const struct lu_fid *f,
>> +				    const struct lu_object_conf *conf)
>> {
>> 	struct lu_object      *o;
>> 	struct lu_object      *shadow;
>> @@ -725,17 +735,16 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
>> 	 * It is unnecessary to perform lookup-alloc-lookup-insert, instead,
>> 	 * just alloc and insert directly.
>> 	 *
>> -	 * If dying object is found during index search, add @waiter to the
>> -	 * site wait-queue and return ERR_PTR(-EAGAIN).
>> 	 */
>> 	if (conf && conf->loc_flags & LOC_F_NEW)
>> 		return lu_object_new(env, dev, f, conf);
>> 
>> 	s  = dev->ld_site;
>> 	hs = s->ls_obj_hash;
>> -	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
>> -	o = htable_lookup(s, &bd, f, waiter, &version);
>> -	cfs_hash_bd_unlock(hs, &bd, 1);
>> +	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 0);
>> +	o = htable_lookup(s, &bd, f, &version);
>> +	cfs_hash_bd_unlock(hs, &bd, 0);
>
> Here you changed the locking to a non-exclusive (read) lock instead of an
> exclusive (write) lock?  Why.

Carelessness is all.  And the fact that my thinking was focused on
rhashtable which doesn't make that distinction. Fixed.

Thanks,
NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 02/10] staging: lustre: make struct lu_site_bkt_data private
  2018-05-03 23:39     ` NeilBrown
@ 2018-05-07  1:42       ` Greg Kroah-Hartman
  0 siblings, 0 replies; 28+ messages in thread
From: Greg Kroah-Hartman @ 2018-05-07  1:42 UTC (permalink / raw)
  To: NeilBrown
  Cc: James Simmons, Oleg Drokin, Andreas Dilger,
	Linux Kernel Mailing List, Lustre Development List

On Fri, May 04, 2018 at 09:39:04AM +1000, NeilBrown wrote:
> On Wed, May 02 2018, James Simmons wrote:
> 
> >> This data structure only needs to be public so that
> >> various modules can access a wait queue to wait for object
> >> destruction.
> >> If we provide a function to get the wait queue, rather than the
> >> whole bucket, the structure can be made private.
> >> 
> >> Signed-off-by: NeilBrown <neilb@suse.com>
> >> ---
> >>  drivers/staging/lustre/lustre/include/lu_object.h  |   36 +-------------
> >>  drivers/staging/lustre/lustre/llite/lcommon_cl.c   |    8 ++-
> >>  drivers/staging/lustre/lustre/lov/lov_object.c     |    8 ++-
> >>  drivers/staging/lustre/lustre/obdclass/lu_object.c |   50 +++++++++++++++++---
> >>  4 files changed, 54 insertions(+), 48 deletions(-)
> >> 
> >> diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
> >> index c3b0ed518819..f29bbca5af65 100644
> >> --- a/drivers/staging/lustre/lustre/include/lu_object.h
> >> +++ b/drivers/staging/lustre/lustre/include/lu_object.h
> >> @@ -549,31 +549,7 @@ struct lu_object_header {
> >>  };
> >>  
> >>  struct fld;
> >> -
> >> -struct lu_site_bkt_data {
> >> -	/**
> >> -	 * number of object in this bucket on the lsb_lru list.
> >> -	 */
> >> -	long			lsb_lru_len;
> >> -	/**
> >> -	 * LRU list, updated on each access to object. Protected by
> >> -	 * bucket lock of lu_site::ls_obj_hash.
> >> -	 *
> >> -	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
> >> -	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
> >> -	 * of list_for_each_entry_safe_reverse()).
> >> -	 */
> >> -	struct list_head		lsb_lru;
> >> -	/**
> >> -	 * Wait-queue signaled when an object in this site is ultimately
> >> -	 * destroyed (lu_object_free()). It is used by lu_object_find() to
> >> -	 * wait before re-trying when object in the process of destruction is
> >> -	 * found in the hash table.
> >> -	 *
> >> -	 * \see htable_lookup().
> >> -	 */
> >> -	wait_queue_head_t	       lsb_marche_funebre;
> >> -};
> >> +struct lu_site_bkt_data;
> >>  
> >>  enum {
> >>  	LU_SS_CREATED	 = 0,
> >> @@ -642,14 +618,8 @@ struct lu_site {
> >>  	struct percpu_counter	 ls_lru_len_counter;
> >>  };
> >>  
> >> -static inline struct lu_site_bkt_data *
> >> -lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid)
> >> -{
> >> -	struct cfs_hash_bd bd;
> >> -
> >> -	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
> >> -	return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
> >> -}
> >> +wait_queue_head_t *
> >> +lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid);
> >>  
> >>  static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
> >>  {
> >> diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
> >> index df5c0c0ae703..d5b42fb1d601 100644
> >> --- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
> >> +++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
> >> @@ -211,12 +211,12 @@ static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
> >>  
> >>  	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
> >>  		struct lu_site *site = obj->co_lu.lo_dev->ld_site;
> >> -		struct lu_site_bkt_data *bkt;
> >> +		wait_queue_head_t *wq;
> >>  
> >> -		bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
> >> +		wq = lu_site_wq_from_fid(site, &header->loh_fid);
> >>  
> >>  		init_waitqueue_entry(&waiter, current);
> >> -		add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
> >> +		add_wait_queue(wq, &waiter);
> >>  
> >>  		while (1) {
> >>  			set_current_state(TASK_UNINTERRUPTIBLE);
> >> @@ -226,7 +226,7 @@ static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
> >>  		}
> >>  
> >>  		set_current_state(TASK_RUNNING);
> >> -		remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
> >> +		remove_wait_queue(wq, &waiter);
> >>  	}
> >>  
> >>  	cl_object_put(env, obj);
> >> diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
> >> index f7c69680cb7d..adc90f310fd7 100644
> >> --- a/drivers/staging/lustre/lustre/lov/lov_object.c
> >> +++ b/drivers/staging/lustre/lustre/lov/lov_object.c
> >> @@ -370,7 +370,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
> >>  	struct cl_object	*sub;
> >>  	struct lov_layout_raid0 *r0;
> >>  	struct lu_site	  *site;
> >> -	struct lu_site_bkt_data *bkt;
> >> +	wait_queue_head_t *wq;
> >>  	wait_queue_entry_t	  *waiter;
> >>  
> >>  	r0  = &lov->u.raid0;
> >> @@ -378,7 +378,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
> >>  
> >>  	sub  = lovsub2cl(los);
> >>  	site = sub->co_lu.lo_dev->ld_site;
> >> -	bkt  = lu_site_bkt_from_fid(site, &sub->co_lu.lo_header->loh_fid);
> >> +	wq   = lu_site_wq_from_fid(site, &sub->co_lu.lo_header->loh_fid);
> >>  
> >>  	cl_object_kill(env, sub);
> >>  	/* release a reference to the sub-object and ... */
> >> @@ -391,7 +391,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
> >>  	if (r0->lo_sub[idx] == los) {
> >>  		waiter = &lov_env_info(env)->lti_waiter;
> >>  		init_waitqueue_entry(waiter, current);
> >> -		add_wait_queue(&bkt->lsb_marche_funebre, waiter);
> >> +		add_wait_queue(wq, waiter);
> >>  		set_current_state(TASK_UNINTERRUPTIBLE);
> >>  		while (1) {
> >>  			/* this wait-queue is signaled at the end of
> >> @@ -408,7 +408,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
> >>  				break;
> >>  			}
> >>  		}
> >> -		remove_wait_queue(&bkt->lsb_marche_funebre, waiter);
> >> +		remove_wait_queue(wq, waiter);
> >>  	}
> >>  	LASSERT(!r0->lo_sub[idx]);
> >>  }
> >> diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> >> index 3de7dc0497c4..2a8a25d6edb5 100644
> >> --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
> >> +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
> >> @@ -56,6 +56,31 @@
> >>  #include <lu_ref.h>
> >>  #include <linux/list.h>
> >>  
> >> +struct lu_site_bkt_data {
> >> +	/**
> >> +	 * number of object in this bucket on the lsb_lru list.
> >> +	 */
> >> +	long			lsb_lru_len;
> >> +	/**
> >> +	 * LRU list, updated on each access to object. Protected by
> >> +	 * bucket lock of lu_site::ls_obj_hash.
> >> +	 *
> >> +	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
> >> +	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
> >> +	 * of list_for_each_entry_safe_reverse()).
> >> +	 */
> >> +	struct list_head		lsb_lru;
> >> +	/**
> >> +	 * Wait-queue signaled when an object in this site is ultimately
> >> +	 * destroyed (lu_object_free()). It is used by lu_object_find() to
> >> +	 * wait before re-trying when object in the process of destruction is
> >> +	 * found in the hash table.
> >> +	 *
> >> +	 * \see htable_lookup().
> >> +	 */
> >> +	wait_queue_head_t	       lsb_marche_funebre;
> >> +};
> >> +
> >>  enum {
> >>  	LU_CACHE_PERCENT_MAX	 = 50,
> >>  	LU_CACHE_PERCENT_DEFAULT = 20
> >> @@ -88,6 +113,17 @@ MODULE_PARM_DESC(lu_cache_nr, "Maximum number of objects in lu_object cache");
> >>  static void lu_object_free(const struct lu_env *env, struct lu_object *o);
> >>  static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx);
> >>  
> >> +wait_queue_head_t *
> >> +lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid)
> >> +{
> >> +	struct cfs_hash_bd bd;
> >> +	struct lu_site_bkt_data *bkt;
> >> +
> >> +	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
> >> +	bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
> >> +	return &bkt->lsb_marche_funebre;
> >> +}
> >> +
> >
> > Nak. You forgot the EXPORT_SYMBOL for this. Once I added the export symbol
> > it seems to work so far in my testing.
> 
> Thanks!  I'll try to remember the occasional build-test with modules.

'make allmodconfig' is your friend :)

I'm dropping this series, I'm guessing you will send a v2 for the whole
set.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 04/10] staging: lustre: lu_object: move retry logic inside htable_lookup
  2018-05-07  0:54 [PATCH 00/10 - v2] staging: lustre: assorted improvements NeilBrown
@ 2018-05-07  0:54 ` NeilBrown
  0 siblings, 0 replies; 28+ messages in thread
From: NeilBrown @ 2018-05-07  0:54 UTC (permalink / raw)
  To: Oleg Drokin, Greg Kroah-Hartman, James Simmons, Andreas Dilger
  Cc: Linux Kernel Mailing List, Lustre Development List

The current retry logic, to wait when a 'dying' object is found,
spans multiple functions.  The process is attached to a waitqueue
and set TASK_UNINTERRUPTIBLE in htable_lookup, and this status
is passed back through lu_object_find_try() to lu_object_find_at()
where schedule() is called and the process is removed from the queue.

This can be simplified by moving all the logic (including
hashtable locking) inside htable_lookup(), which now never returns
EAGAIN.

Note that htable_lookup() is called with the hash bucket lock
held, and will drop and retake it if it needs to schedule.

I made this a 'goto' loop rather than a 'while(1)' loop as the
diff is easier to read.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/staging/lustre/lustre/obdclass/lu_object.c |   70 ++++++++------------
 1 file changed, 27 insertions(+), 43 deletions(-)

diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index bf5b309edeae..0c9419a6d5d6 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -584,16 +584,24 @@ void lu_object_print(const struct lu_env *env, void *cookie,
 }
 EXPORT_SYMBOL(lu_object_print);
 
+/*
+ * NOTE: htable_lookup() is called with the relevant
+ * hash bucket locked, but might drop and re-acquire the lock.
+ */
 static struct lu_object *htable_lookup(struct lu_site *s,
 				       struct cfs_hash_bd *bd,
 				       const struct lu_fid *f,
-				       wait_queue_entry_t *waiter,
 				       __u64 *version)
 {
+	struct cfs_hash		*hs = s->ls_obj_hash;
 	struct lu_site_bkt_data *bkt;
 	struct lu_object_header *h;
 	struct hlist_node	*hnode;
-	__u64  ver = cfs_hash_bd_version_get(bd);
+	__u64 ver;
+	wait_queue_entry_t waiter;
+
+retry:
+	ver = cfs_hash_bd_version_get(bd);
 
 	if (*version == ver)
 		return ERR_PTR(-ENOENT);
@@ -626,11 +634,15 @@ static struct lu_object *htable_lookup(struct lu_site *s,
 	 * drained), and moreover, lookup has to wait until object is freed.
 	 */
 
-	init_waitqueue_entry(waiter, current);
-	add_wait_queue(&bkt->lsb_marche_funebre, waiter);
+	init_waitqueue_entry(&waiter, current);
+	add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
-	return ERR_PTR(-EAGAIN);
+	cfs_hash_bd_unlock(hs, bd, 1);
+	schedule();
+	remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+	cfs_hash_bd_lock(hs, bd, 1);
+	goto retry;
 }
 
 /**
@@ -694,13 +706,14 @@ static struct lu_object *lu_object_new(const struct lu_env *env,
 }
 
 /**
- * Core logic of lu_object_find*() functions.
+ * Much like lu_object_find(), but top level device of object is specifically
+ * \a dev rather than top level device of the site. This interface allows
+ * objects of different "stacking" to be created within the same site.
  */
-static struct lu_object *lu_object_find_try(const struct lu_env *env,
-					    struct lu_device *dev,
-					    const struct lu_fid *f,
-					    const struct lu_object_conf *conf,
-					    wait_queue_entry_t *waiter)
+struct lu_object *lu_object_find_at(const struct lu_env *env,
+				    struct lu_device *dev,
+				    const struct lu_fid *f,
+				    const struct lu_object_conf *conf)
 {
 	struct lu_object      *o;
 	struct lu_object      *shadow;
@@ -726,8 +739,6 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 	 * It is unnecessary to perform lookup-alloc-lookup-insert, instead,
 	 * just alloc and insert directly.
 	 *
-	 * If dying object is found during index search, add @waiter to the
-	 * site wait-queue and return ERR_PTR(-EAGAIN).
 	 */
 	if (conf && conf->loc_flags & LOC_F_NEW)
 		return lu_object_new(env, dev, f, conf);
@@ -735,8 +746,9 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 	s  = dev->ld_site;
 	hs = s->ls_obj_hash;
 	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
-	o = htable_lookup(s, &bd, f, waiter, &version);
+	o = htable_lookup(s, &bd, f, &version);
 	cfs_hash_bd_unlock(hs, &bd, 1);
+
 	if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
 		return o;
 
@@ -752,7 +764,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 
 	cfs_hash_bd_lock(hs, &bd, 1);
 
-	shadow = htable_lookup(s, &bd, f, waiter, &version);
+	shadow = htable_lookup(s, &bd, f, &version);
 	if (likely(PTR_ERR(shadow) == -ENOENT)) {
 		cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
 		cfs_hash_bd_unlock(hs, &bd, 1);
@@ -767,34 +779,6 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 	lu_object_free(env, o);
 	return shadow;
 }
-
-/**
- * Much like lu_object_find(), but top level device of object is specifically
- * \a dev rather than top level device of the site. This interface allows
- * objects of different "stacking" to be created within the same site.
- */
-struct lu_object *lu_object_find_at(const struct lu_env *env,
-				    struct lu_device *dev,
-				    const struct lu_fid *f,
-				    const struct lu_object_conf *conf)
-{
-	wait_queue_head_t	*wq;
-	struct lu_object	*obj;
-	wait_queue_entry_t	   wait;
-
-	while (1) {
-		obj = lu_object_find_try(env, dev, f, conf, &wait);
-		if (obj != ERR_PTR(-EAGAIN))
-			return obj;
-		/*
-		 * lu_object_find_try() already added waiter into the
-		 * wait queue.
-		 */
-		schedule();
-		wq = lu_site_wq_from_fid(dev->ld_site, (void *)f);
-		remove_wait_queue(wq, &wait);
-	}
-}
 EXPORT_SYMBOL(lu_object_find_at);
 
 /**

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2018-05-07  1:42 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-01  3:52 [PATCH 00/10] staging: lustre: assorted improvements NeilBrown
2018-05-01  3:52 ` [PATCH 02/10] staging: lustre: make struct lu_site_bkt_data private NeilBrown
2018-05-01  4:10   ` [lustre-devel] " Dilger, Andreas
2018-05-02  3:02   ` James Simmons
2018-05-03 23:39     ` NeilBrown
2018-05-07  1:42       ` Greg Kroah-Hartman
2018-05-01  3:52 ` [PATCH 01/10] staging: lustre: ldlm: store name directly in namespace NeilBrown
2018-05-01  4:04   ` Dilger, Andreas
2018-05-02 18:11   ` James Simmons
2018-05-01  3:52 ` [PATCH 03/10] staging: lustre: lu_object: discard extra lru count NeilBrown
2018-05-01  4:19   ` Dilger, Andreas
2018-05-04  0:08     ` NeilBrown
2018-05-01  3:52 ` [PATCH 07/10] staging: lustre: llite: remove redundant lookup in dump_pgcache NeilBrown
2018-05-01  3:52 ` [PATCH 08/10] staging: lustre: move misc-device registration closer to related code NeilBrown
2018-05-02 18:12   ` James Simmons
2018-05-01  3:52 ` [PATCH 04/10] staging: lustre: lu_object: move retry logic inside htable_lookup NeilBrown
2018-05-01  8:22   ` [lustre-devel] " Dilger, Andreas
2018-05-02 18:21     ` James Simmons
2018-05-04  0:30       ` NeilBrown
2018-05-04  1:30     ` NeilBrown
2018-05-01  3:52 ` [PATCH 05/10] staging: lustre: fold lu_object_new() into lu_object_find_at() NeilBrown
2018-05-01  3:52 ` [PATCH 10/10] staging: lustre: fix error deref in ll_splice_alias() NeilBrown
2018-05-02  3:05   ` James Simmons
2018-05-04  0:34     ` NeilBrown
2018-05-01  3:52 ` [PATCH 06/10] staging: lustre: llite: use more private data in dump_pgcache NeilBrown
2018-05-01  3:52 ` [PATCH 09/10] staging: lustre: move remaining code from linux-module.c to module.c NeilBrown
2018-05-02 18:13   ` James Simmons
2018-05-07  0:54 [PATCH 00/10 - v2] staging: lustre: assorted improvements NeilBrown
2018-05-07  0:54 ` [PATCH 04/10] staging: lustre: lu_object: move retry logic inside htable_lookup NeilBrown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).