LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH V2 0/7] loop: cleanup charging io to mem/blkcg
@ 2021-07-28 8:16 Ming Lei
2021-07-28 8:16 ` [PATCH V2 1/7] mm: memcontrol: add helper of memcg_get_e_css Ming Lei
` (6 more replies)
0 siblings, 7 replies; 9+ messages in thread
From: Ming Lei @ 2021-07-28 8:16 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-kernel, linux-block, Christoph Hellwig, Dan Schatzberg, Ming Lei
Hello Guys,
Cleanup charging io to mem/blkcg a bit:
- avoid to store blkcg_css/memcg_css in loop_cmd, and store blkcg_css in
loop_worker instead
- avoid to acquire ->lo_work_lock in IO path
- simplify blkcg_css query via xarray
- other misc cleanup
V2:
- add helper of memcg_get_e_css
- cleanup #ifdef
- improve the last patch, as discussed with Dan Schatzberg
Ming Lei (7):
mm: memcontrol: add helper of memcg_get_e_css
loop: clean up blkcg association
loop: conver timer for monitoring idle worker into dwork
loop: add __loop_free_idle_workers() for covering freeing workers in
clearing FD
loop: improve loop_process_work
loop: use xarray to store workers
loop: don't add worker into idle list
drivers/block/loop.c | 320 ++++++++++++++++++++-----------------
drivers/block/loop.h | 7 +-
include/linux/memcontrol.h | 8 +
3 files changed, 188 insertions(+), 147 deletions(-)
--
2.31.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH V2 1/7] mm: memcontrol: add helper of memcg_get_e_css
2021-07-28 8:16 [PATCH V2 0/7] loop: cleanup charging io to mem/blkcg Ming Lei
@ 2021-07-28 8:16 ` Ming Lei
2021-07-28 8:16 ` [PATCH V2 2/7] loop: clean up blkcg association Ming Lei
` (5 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Ming Lei @ 2021-07-28 8:16 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-kernel, linux-block, Christoph Hellwig, Dan Schatzberg,
Ming Lei, Andrew Morton
And helper of memcg_get_e_css() so that the consumer needn't to
call cgroup_get_e_css(cgroup, &memory_cgrp_subsys) directly, since
&memory_cgrp_subsys has to be used in case that MEMCG is enabled.
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
include/linux/memcontrol.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bfe5c486f4ad..67c35793fdce 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1101,6 +1101,10 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
+static inline struct cgroup_subsys_state *memcg_get_e_css(struct cgroup *cgrp)
+{
+ return cgroup_get_e_css(cgrp, &memory_cgrp_subsys);
+}
#else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0
@@ -1456,6 +1460,10 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
{
return 0;
}
+static inline struct cgroup_subsys_state *memcg_get_e_css(struct cgroup *cgrp)
+{
+ return NULL;
+}
#endif /* CONFIG_MEMCG */
static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH V2 2/7] loop: clean up blkcg association
2021-07-28 8:16 [PATCH V2 0/7] loop: cleanup charging io to mem/blkcg Ming Lei
2021-07-28 8:16 ` [PATCH V2 1/7] mm: memcontrol: add helper of memcg_get_e_css Ming Lei
@ 2021-07-28 8:16 ` Ming Lei
2021-07-28 8:16 ` [PATCH V2 3/7] loop: conver timer for monitoring idle worker into dwork Ming Lei
` (4 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Ming Lei @ 2021-07-28 8:16 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-kernel, linux-block, Christoph Hellwig, Dan Schatzberg, Ming Lei
Each loop_worker is responsible for running requests originated from
same blkcg, so:
1) associate with kthread in the entry of loop_process_work(), and
disassociate in the end of this function, then we can avoid to do
both for each request.
2) remove ->blkcg_css and ->memcg_css from 'loop_cmd' since both are
per loop_worker.
Also kill #ifdef in the related functions.
Acked-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/loop.c | 81 +++++++++++++++++++++++---------------------
drivers/block/loop.h | 2 --
2 files changed, 43 insertions(+), 40 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f37b9e3d833c..1756c17d3936 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -936,23 +936,46 @@ static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
{
return !css || css == blkcg_root_css;
}
+static struct cgroup_subsys_state *loop_rq_blkcg_css(struct loop_cmd *cmd)
+{
+ struct request *rq = blk_mq_rq_from_pdu(cmd);
+ /* always use the first bio's css */
+ struct blkcg *blkcg = bio_blkcg(rq->bio);
+
+ if (blkcg)
+ return &blkcg->css;
+ return NULL;
+}
#else
static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
{
return !css;
}
+static struct cgroup_subsys_state *loop_rq_blkcg_css(struct loop_cmd *cmd)
+{
+ return NULL;
+}
#endif
+static struct cgroup_subsys_state *loop_rq_get_memcg_css(
+ struct cgroup_subsys_state *blkcg_css)
+{
+ if (blkcg_css)
+ return memcg_get_e_css(blkcg_css->cgroup);
+ return NULL;
+}
+
static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
{
struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL;
struct loop_worker *cur_worker, *worker = NULL;
struct work_struct *work;
struct list_head *cmd_list;
+ struct cgroup_subsys_state *blkcg_css = loop_rq_blkcg_css(cmd);
spin_lock_irq(&lo->lo_work_lock);
- if (queue_on_root_worker(cmd->blkcg_css))
+ if (queue_on_root_worker(blkcg_css))
goto queue_work;
node = &lo->worker_tree.rb_node;
@@ -960,10 +983,10 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
while (*node) {
parent = *node;
cur_worker = container_of(*node, struct loop_worker, rb_node);
- if (cur_worker->blkcg_css == cmd->blkcg_css) {
+ if (cur_worker->blkcg_css == blkcg_css) {
worker = cur_worker;
break;
- } else if ((long)cur_worker->blkcg_css < (long)cmd->blkcg_css) {
+ } else if ((long)cur_worker->blkcg_css < (long)blkcg_css) {
node = &(*node)->rb_left;
} else {
node = &(*node)->rb_right;
@@ -977,15 +1000,10 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
* In the event we cannot allocate a worker, just queue on the
* rootcg worker and issue the I/O as the rootcg
*/
- if (!worker) {
- cmd->blkcg_css = NULL;
- if (cmd->memcg_css)
- css_put(cmd->memcg_css);
- cmd->memcg_css = NULL;
+ if (!worker)
goto queue_work;
- }
- worker->blkcg_css = cmd->blkcg_css;
+ worker->blkcg_css = blkcg_css;
css_get(worker->blkcg_css);
INIT_WORK(&worker->work, loop_workfn);
INIT_LIST_HEAD(&worker->cmd_list);
@@ -2098,19 +2116,6 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
break;
}
- /* always use the first bio's css */
- cmd->blkcg_css = NULL;
- cmd->memcg_css = NULL;
-#ifdef CONFIG_BLK_CGROUP
- if (rq->bio && rq->bio->bi_blkg) {
- cmd->blkcg_css = &bio_blkcg(rq->bio)->css;
-#ifdef CONFIG_MEMCG
- cmd->memcg_css =
- cgroup_get_e_css(cmd->blkcg_css->cgroup,
- &memory_cgrp_subsys);
-#endif
- }
-#endif
loop_queue_work(lo, cmd);
return BLK_STS_OK;
@@ -2122,28 +2127,14 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
const bool write = op_is_write(req_op(rq));
struct loop_device *lo = rq->q->queuedata;
int ret = 0;
- struct mem_cgroup *old_memcg = NULL;
if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
ret = -EIO;
goto failed;
}
- if (cmd->blkcg_css)
- kthread_associate_blkcg(cmd->blkcg_css);
- if (cmd->memcg_css)
- old_memcg = set_active_memcg(
- mem_cgroup_from_css(cmd->memcg_css));
-
ret = do_req_filebacked(lo, rq);
- if (cmd->blkcg_css)
- kthread_associate_blkcg(NULL);
-
- if (cmd->memcg_css) {
- set_active_memcg(old_memcg);
- css_put(cmd->memcg_css);
- }
failed:
/* complete non-aio request */
if (!cmd->use_aio || ret) {
@@ -2199,7 +2190,21 @@ static void loop_workfn(struct work_struct *work)
{
struct loop_worker *worker =
container_of(work, struct loop_worker, work);
- loop_process_work(worker, &worker->cmd_list, worker->lo);
+ struct mem_cgroup *old_memcg = NULL;
+ struct cgroup_subsys_state *memcg_css = NULL;
+
+ kthread_associate_blkcg(worker->blkcg_css);
+ memcg_css = loop_rq_get_memcg_css(worker->blkcg_css);
+ if (memcg_css) {
+ old_memcg = set_active_memcg(
+ mem_cgroup_from_css(memcg_css));
+ loop_process_work(worker, &worker->cmd_list, worker->lo);
+ set_active_memcg(old_memcg);
+ css_put(memcg_css);
+ } else {
+ loop_process_work(worker, &worker->cmd_list, worker->lo);
+ }
+ kthread_associate_blkcg(NULL);
}
static void loop_rootcg_workfn(struct work_struct *work)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 1988899db63a..a52a3fd89457 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -77,8 +77,6 @@ struct loop_cmd {
long ret;
struct kiocb iocb;
struct bio_vec *bvec;
- struct cgroup_subsys_state *blkcg_css;
- struct cgroup_subsys_state *memcg_css;
};
/* Support for loadable transfer modules */
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH V2 3/7] loop: conver timer for monitoring idle worker into dwork
2021-07-28 8:16 [PATCH V2 0/7] loop: cleanup charging io to mem/blkcg Ming Lei
2021-07-28 8:16 ` [PATCH V2 1/7] mm: memcontrol: add helper of memcg_get_e_css Ming Lei
2021-07-28 8:16 ` [PATCH V2 2/7] loop: clean up blkcg association Ming Lei
@ 2021-07-28 8:16 ` Ming Lei
2021-07-28 8:16 ` [PATCH V2 4/7] loop: add __loop_free_idle_workers() for covering freeing workers in clearing FD Ming Lei
` (3 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Ming Lei @ 2021-07-28 8:16 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-kernel, linux-block, Christoph Hellwig, Dan Schatzberg, Ming Lei
Not necessary to use a timer to do that, and dwork is just fine,
then we don't need to always disable interrupt when acquiring
->loop_work_lock.
Acked-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/loop.c | 75 ++++++++++++++++++++++----------------------
drivers/block/loop.h | 2 +-
2 files changed, 38 insertions(+), 39 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1756c17d3936..f63bd75e80ba 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -929,7 +929,6 @@ struct loop_worker {
static void loop_workfn(struct work_struct *work);
static void loop_rootcg_workfn(struct work_struct *work);
-static void loop_free_idle_workers(struct timer_list *timer);
#ifdef CONFIG_BLK_CGROUP
static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
@@ -973,7 +972,7 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
struct list_head *cmd_list;
struct cgroup_subsys_state *blkcg_css = loop_rq_blkcg_css(cmd);
- spin_lock_irq(&lo->lo_work_lock);
+ spin_lock(&lo->lo_work_lock);
if (queue_on_root_worker(blkcg_css))
goto queue_work;
@@ -1028,7 +1027,7 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
}
list_add_tail(&cmd->list_entry, cmd_list);
queue_work(lo->workqueue, work);
- spin_unlock_irq(&lo->lo_work_lock);
+ spin_unlock(&lo->lo_work_lock);
}
static void loop_update_rotational(struct loop_device *lo)
@@ -1150,6 +1149,33 @@ loop_set_status_from_info(struct loop_device *lo,
return 0;
}
+static void loop_set_timer(struct loop_device *lo)
+{
+ schedule_delayed_work(&lo->idle_work, LOOP_IDLE_WORKER_TIMEOUT);
+}
+
+static void loop_free_idle_workers(struct work_struct *work)
+{
+ struct loop_device *lo = container_of(work, struct loop_device,
+ idle_work.work);
+ struct loop_worker *pos, *worker;
+
+ spin_lock(&lo->lo_work_lock);
+ list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
+ idle_list) {
+ if (time_is_after_jiffies(worker->last_ran_at +
+ LOOP_IDLE_WORKER_TIMEOUT))
+ break;
+ list_del(&worker->idle_list);
+ rb_erase(&worker->rb_node, &lo->worker_tree);
+ css_put(worker->blkcg_css);
+ kfree(worker);
+ }
+ if (!list_empty(&lo->idle_worker_list))
+ loop_set_timer(lo);
+ spin_unlock(&lo->lo_work_lock);
+}
+
static int loop_configure(struct loop_device *lo, fmode_t mode,
struct block_device *bdev,
const struct loop_config *config)
@@ -1229,8 +1255,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
INIT_LIST_HEAD(&lo->rootcg_cmd_list);
INIT_LIST_HEAD(&lo->idle_worker_list);
lo->worker_tree = RB_ROOT;
- timer_setup(&lo->timer, loop_free_idle_workers,
- TIMER_DEFERRABLE);
+ INIT_DELAYED_WORK(&lo->idle_work, loop_free_idle_workers);
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
lo->lo_backing_file = file;
@@ -1320,7 +1345,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
blk_mq_freeze_queue(lo->lo_queue);
destroy_workqueue(lo->workqueue);
- spin_lock_irq(&lo->lo_work_lock);
+ spin_lock(&lo->lo_work_lock);
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
idle_list) {
list_del(&worker->idle_list);
@@ -1328,8 +1353,8 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
css_put(worker->blkcg_css);
kfree(worker);
}
- spin_unlock_irq(&lo->lo_work_lock);
- del_timer_sync(&lo->timer);
+ spin_unlock(&lo->lo_work_lock);
+ cancel_delayed_work_sync(&lo->idle_work);
spin_lock_irq(&lo->lo_lock);
lo->lo_backing_file = NULL;
@@ -2147,11 +2172,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
}
}
-static void loop_set_timer(struct loop_device *lo)
-{
- timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
-}
-
static void loop_process_work(struct loop_worker *worker,
struct list_head *cmd_list, struct loop_device *lo)
{
@@ -2159,17 +2179,17 @@ static void loop_process_work(struct loop_worker *worker,
struct loop_cmd *cmd;
current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
- spin_lock_irq(&lo->lo_work_lock);
+ spin_lock(&lo->lo_work_lock);
while (!list_empty(cmd_list)) {
cmd = container_of(
cmd_list->next, struct loop_cmd, list_entry);
list_del(cmd_list->next);
- spin_unlock_irq(&lo->lo_work_lock);
+ spin_unlock(&lo->lo_work_lock);
loop_handle_cmd(cmd);
cond_resched();
- spin_lock_irq(&lo->lo_work_lock);
+ spin_lock(&lo->lo_work_lock);
}
/*
@@ -2182,7 +2202,7 @@ static void loop_process_work(struct loop_worker *worker,
list_add_tail(&worker->idle_list, &lo->idle_worker_list);
loop_set_timer(lo);
}
- spin_unlock_irq(&lo->lo_work_lock);
+ spin_unlock(&lo->lo_work_lock);
current->flags = orig_flags;
}
@@ -2214,27 +2234,6 @@ static void loop_rootcg_workfn(struct work_struct *work)
loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
}
-static void loop_free_idle_workers(struct timer_list *timer)
-{
- struct loop_device *lo = container_of(timer, struct loop_device, timer);
- struct loop_worker *pos, *worker;
-
- spin_lock_irq(&lo->lo_work_lock);
- list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
- idle_list) {
- if (time_is_after_jiffies(worker->last_ran_at +
- LOOP_IDLE_WORKER_TIMEOUT))
- break;
- list_del(&worker->idle_list);
- rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->blkcg_css);
- kfree(worker);
- }
- if (!list_empty(&lo->idle_worker_list))
- loop_set_timer(lo);
- spin_unlock_irq(&lo->lo_work_lock);
-}
-
static const struct blk_mq_ops loop_mq_ops = {
.queue_rq = loop_queue_rq,
.complete = lo_complete_rq,
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index a52a3fd89457..9df889af1bcf 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -60,7 +60,7 @@ struct loop_device {
struct list_head rootcg_cmd_list;
struct list_head idle_worker_list;
struct rb_root worker_tree;
- struct timer_list timer;
+ struct delayed_work idle_work;
bool use_dio;
bool sysfs_inited;
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH V2 4/7] loop: add __loop_free_idle_workers() for covering freeing workers in clearing FD
2021-07-28 8:16 [PATCH V2 0/7] loop: cleanup charging io to mem/blkcg Ming Lei
` (2 preceding siblings ...)
2021-07-28 8:16 ` [PATCH V2 3/7] loop: conver timer for monitoring idle worker into dwork Ming Lei
@ 2021-07-28 8:16 ` Ming Lei
2021-07-28 8:16 ` [PATCH V2 5/7] loop: improve loop_process_work Ming Lei
` (2 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Ming Lei @ 2021-07-28 8:16 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-kernel, linux-block, Christoph Hellwig, Dan Schatzberg, Ming Lei
Add the helper, so we can remove the duplicated code for freeing all
workers in clearing FD.
Acked-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/loop.c | 25 +++++++++++--------------
1 file changed, 11 insertions(+), 14 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f63bd75e80ba..4c524c72d976 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1154,16 +1154,14 @@ static void loop_set_timer(struct loop_device *lo)
schedule_delayed_work(&lo->idle_work, LOOP_IDLE_WORKER_TIMEOUT);
}
-static void loop_free_idle_workers(struct work_struct *work)
+static void __loop_free_idle_workers(struct loop_device *lo, bool force)
{
- struct loop_device *lo = container_of(work, struct loop_device,
- idle_work.work);
struct loop_worker *pos, *worker;
spin_lock(&lo->lo_work_lock);
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
idle_list) {
- if (time_is_after_jiffies(worker->last_ran_at +
+ if (!force && time_is_after_jiffies(worker->last_ran_at +
LOOP_IDLE_WORKER_TIMEOUT))
break;
list_del(&worker->idle_list);
@@ -1176,6 +1174,14 @@ static void loop_free_idle_workers(struct work_struct *work)
spin_unlock(&lo->lo_work_lock);
}
+static void loop_free_idle_workers(struct work_struct *work)
+{
+ struct loop_device *lo = container_of(work, struct loop_device,
+ idle_work.work);
+
+ __loop_free_idle_workers(lo, false);
+}
+
static int loop_configure(struct loop_device *lo, fmode_t mode,
struct block_device *bdev,
const struct loop_config *config)
@@ -1324,7 +1330,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
int err = 0;
bool partscan = false;
int lo_number;
- struct loop_worker *pos, *worker;
mutex_lock(&lo->lo_mutex);
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
@@ -1345,15 +1350,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
blk_mq_freeze_queue(lo->lo_queue);
destroy_workqueue(lo->workqueue);
- spin_lock(&lo->lo_work_lock);
- list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
- idle_list) {
- list_del(&worker->idle_list);
- rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->blkcg_css);
- kfree(worker);
- }
- spin_unlock(&lo->lo_work_lock);
+ __loop_free_idle_workers(lo, true);
cancel_delayed_work_sync(&lo->idle_work);
spin_lock_irq(&lo->lo_lock);
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH V2 5/7] loop: improve loop_process_work
2021-07-28 8:16 [PATCH V2 0/7] loop: cleanup charging io to mem/blkcg Ming Lei
` (3 preceding siblings ...)
2021-07-28 8:16 ` [PATCH V2 4/7] loop: add __loop_free_idle_workers() for covering freeing workers in clearing FD Ming Lei
@ 2021-07-28 8:16 ` Ming Lei
2021-07-28 8:16 ` [PATCH V2 6/7] loop: use xarray to store workers Ming Lei
2021-07-28 8:16 ` [PATCH V2 7/7] loop: don't add worker into idle list Ming Lei
6 siblings, 0 replies; 9+ messages in thread
From: Ming Lei @ 2021-07-28 8:16 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-kernel, linux-block, Christoph Hellwig, Dan Schatzberg, Ming Lei
Avoid to acquire the spinlock for handling every loop command, and hold
lock once for taking all commands.
Acked-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/loop.c | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 4c524c72d976..74682c0f3f86 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2174,21 +2174,26 @@ static void loop_process_work(struct loop_worker *worker,
{
int orig_flags = current->flags;
struct loop_cmd *cmd;
+ LIST_HEAD(list);
current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
+
spin_lock(&lo->lo_work_lock);
- while (!list_empty(cmd_list)) {
- cmd = container_of(
- cmd_list->next, struct loop_cmd, list_entry);
- list_del(cmd_list->next);
- spin_unlock(&lo->lo_work_lock);
+ again:
+ list_splice_init(cmd_list, &list);
+ spin_unlock(&lo->lo_work_lock);
- loop_handle_cmd(cmd);
- cond_resched();
+ while (!list_empty(&list)) {
+ cmd = list_first_entry(&list, struct loop_cmd, list_entry);
+ list_del_init(&cmd->list_entry);
- spin_lock(&lo->lo_work_lock);
+ loop_handle_cmd(cmd);
}
+ spin_lock(&lo->lo_work_lock);
+ if (!list_empty(cmd_list))
+ goto again;
+
/*
* We only add to the idle list if there are no pending cmds
* *and* the worker will not run again which ensures that it
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH V2 6/7] loop: use xarray to store workers
2021-07-28 8:16 [PATCH V2 0/7] loop: cleanup charging io to mem/blkcg Ming Lei
` (4 preceding siblings ...)
2021-07-28 8:16 ` [PATCH V2 5/7] loop: improve loop_process_work Ming Lei
@ 2021-07-28 8:16 ` Ming Lei
2021-07-28 8:16 ` [PATCH V2 7/7] loop: don't add worker into idle list Ming Lei
6 siblings, 0 replies; 9+ messages in thread
From: Ming Lei @ 2021-07-28 8:16 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-kernel, linux-block, Christoph Hellwig, Dan Schatzberg, Ming Lei
css->id is unique in io controller wide, so replace rbtree with xarray
for querying/storing 'blkcg_css' by using css->id as key, then code is
simplified a lot.
Acked-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/loop.c | 36 ++++++++++++++----------------------
drivers/block/loop.h | 3 ++-
2 files changed, 16 insertions(+), 23 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 74682c0f3f86..9b22e5469ed3 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -918,7 +918,6 @@ static void loop_config_discard(struct loop_device *lo)
}
struct loop_worker {
- struct rb_node rb_node;
struct work_struct work;
struct list_head cmd_list;
struct list_head idle_list;
@@ -966,35 +965,23 @@ static struct cgroup_subsys_state *loop_rq_get_memcg_css(
static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
{
- struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL;
- struct loop_worker *cur_worker, *worker = NULL;
+ struct loop_worker *worker = NULL;
struct work_struct *work;
struct list_head *cmd_list;
struct cgroup_subsys_state *blkcg_css = loop_rq_blkcg_css(cmd);
+ gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
spin_lock(&lo->lo_work_lock);
if (queue_on_root_worker(blkcg_css))
goto queue_work;
- node = &lo->worker_tree.rb_node;
-
- while (*node) {
- parent = *node;
- cur_worker = container_of(*node, struct loop_worker, rb_node);
- if (cur_worker->blkcg_css == blkcg_css) {
- worker = cur_worker;
- break;
- } else if ((long)cur_worker->blkcg_css < (long)blkcg_css) {
- node = &(*node)->rb_left;
- } else {
- node = &(*node)->rb_right;
- }
- }
+ /* css->id is unique in each cgroup subsystem */
+ worker = xa_load(&lo->workers, blkcg_css->id);
if (worker)
goto queue_work;
- worker = kzalloc(sizeof(struct loop_worker), GFP_NOWAIT | __GFP_NOWARN);
+ worker = kzalloc(sizeof(*worker), gfp);
/*
* In the event we cannot allocate a worker, just queue on the
* rootcg worker and issue the I/O as the rootcg
@@ -1008,8 +995,12 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
INIT_LIST_HEAD(&worker->cmd_list);
INIT_LIST_HEAD(&worker->idle_list);
worker->lo = lo;
- rb_link_node(&worker->rb_node, parent, node);
- rb_insert_color(&worker->rb_node, &lo->worker_tree);
+
+ if (xa_err(xa_store(&lo->workers, blkcg_css->id, worker, gfp))) {
+ kfree(worker);
+ worker = NULL;
+ }
+
queue_work:
if (worker) {
/*
@@ -1165,7 +1156,7 @@ static void __loop_free_idle_workers(struct loop_device *lo, bool force)
LOOP_IDLE_WORKER_TIMEOUT))
break;
list_del(&worker->idle_list);
- rb_erase(&worker->rb_node, &lo->worker_tree);
+ xa_erase(&lo->workers, worker->blkcg_css->id);
css_put(worker->blkcg_css);
kfree(worker);
}
@@ -1260,7 +1251,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
INIT_LIST_HEAD(&lo->rootcg_cmd_list);
INIT_LIST_HEAD(&lo->idle_worker_list);
- lo->worker_tree = RB_ROOT;
+ xa_init(&lo->workers);
INIT_DELAYED_WORK(&lo->idle_work, loop_free_idle_workers);
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
@@ -1352,6 +1343,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
destroy_workqueue(lo->workqueue);
__loop_free_idle_workers(lo, true);
cancel_delayed_work_sync(&lo->idle_work);
+ xa_destroy(&lo->workers);
spin_lock_irq(&lo->lo_lock);
lo->lo_backing_file = NULL;
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 9df889af1bcf..cab34da1e1bb 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -14,6 +14,7 @@
#include <linux/blk-mq.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
+#include <linux/xarray.h>
#include <uapi/linux/loop.h>
/* Possible states of device */
@@ -59,7 +60,7 @@ struct loop_device {
struct work_struct rootcg_work;
struct list_head rootcg_cmd_list;
struct list_head idle_worker_list;
- struct rb_root worker_tree;
+ struct xarray workers;
struct delayed_work idle_work;
bool use_dio;
bool sysfs_inited;
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH V2 7/7] loop: don't add worker into idle list
2021-07-28 8:16 [PATCH V2 0/7] loop: cleanup charging io to mem/blkcg Ming Lei
` (5 preceding siblings ...)
2021-07-28 8:16 ` [PATCH V2 6/7] loop: use xarray to store workers Ming Lei
@ 2021-07-28 8:16 ` Ming Lei
2021-07-30 16:52 ` Dan Schatzberg
6 siblings, 1 reply; 9+ messages in thread
From: Ming Lei @ 2021-07-28 8:16 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-kernel, linux-block, Christoph Hellwig, Dan Schatzberg, Ming Lei
We can retrieve any workers via xarray, so not add it into idle list.
Meantime reduce .lo_work_lock coverage, especially we don't need that
in IO path except for adding/deleting worker into xarray.
Also replace .last_ran_at with .reclaim_time, which is set when adding
loop command into worker->cmd_list. Meantime reclaim the worker when
the worker is expired and no any pending commands.
Cc: Dan Schatzberg <schatzberg.dan@gmail.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/loop.c | 172 ++++++++++++++++++++++++++-----------------
1 file changed, 104 insertions(+), 68 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 9b22e5469ed3..e4b292b2d8f4 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -920,10 +920,11 @@ static void loop_config_discard(struct loop_device *lo)
struct loop_worker {
struct work_struct work;
struct list_head cmd_list;
- struct list_head idle_list;
struct loop_device *lo;
struct cgroup_subsys_state *blkcg_css;
- unsigned long last_ran_at;
+ unsigned long reclaim_time;
+ spinlock_t lock;
+ refcount_t refcnt;
};
static void loop_workfn(struct work_struct *work);
@@ -963,62 +964,93 @@ static struct cgroup_subsys_state *loop_rq_get_memcg_css(
return NULL;
}
-static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
+static struct loop_worker *loop_alloc_or_get_worker(struct loop_device *lo,
+ struct cgroup_subsys_state *blkcg_css)
{
- struct loop_worker *worker = NULL;
- struct work_struct *work;
- struct list_head *cmd_list;
- struct cgroup_subsys_state *blkcg_css = loop_rq_blkcg_css(cmd);
gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
+ struct loop_worker *worker = kzalloc(sizeof(*worker), gfp);
+ struct loop_worker *worker_old;
- spin_lock(&lo->lo_work_lock);
-
- if (queue_on_root_worker(blkcg_css))
- goto queue_work;
-
- /* css->id is unique in each cgroup subsystem */
- worker = xa_load(&lo->workers, blkcg_css->id);
- if (worker)
- goto queue_work;
-
- worker = kzalloc(sizeof(*worker), gfp);
- /*
- * In the event we cannot allocate a worker, just queue on the
- * rootcg worker and issue the I/O as the rootcg
- */
if (!worker)
- goto queue_work;
+ return NULL;
worker->blkcg_css = blkcg_css;
- css_get(worker->blkcg_css);
INIT_WORK(&worker->work, loop_workfn);
INIT_LIST_HEAD(&worker->cmd_list);
- INIT_LIST_HEAD(&worker->idle_list);
worker->lo = lo;
+ spin_lock_init(&worker->lock);
+ refcount_set(&worker->refcnt, 2); /* INIT + INC */
- if (xa_err(xa_store(&lo->workers, blkcg_css->id, worker, gfp))) {
+ spin_lock(&lo->lo_work_lock);
+ /* maybe someone is storing a new worker */
+ worker_old = xa_load(&lo->workers, blkcg_css->id);
+ if (!worker_old || !refcount_inc_not_zero(&worker_old->refcnt)) {
+ if (xa_err(xa_store(&lo->workers, blkcg_css->id, worker, gfp))) {
+ kfree(worker);
+ worker = NULL;
+ } else {
+ if (!work_pending(&lo->idle_work.work))
+ schedule_delayed_work(&lo->idle_work,
+ LOOP_IDLE_WORKER_TIMEOUT);
+ css_get(worker->blkcg_css);
+ }
+ } else {
kfree(worker);
- worker = NULL;
+ worker = worker_old;
}
+ spin_unlock(&lo->lo_work_lock);
-queue_work:
- if (worker) {
+ return worker;
+}
+
+static void loop_release_worker(struct loop_worker *worker)
+{
+ css_put(worker->blkcg_css);
+ kfree_rcu(worker);
+}
+
+static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
+{
+ struct loop_worker *worker = NULL;
+ struct work_struct *work;
+ struct list_head *cmd_list;
+ struct cgroup_subsys_state *blkcg_css = loop_rq_blkcg_css(cmd);
+ spinlock_t *lock;
+
+ if (!queue_on_root_worker(blkcg_css)) {
+ int ret = 0;
+
+ rcu_read_lock();
+ /* css->id is unique in each cgroup subsystem */
+ worker = xa_load(&lo->workers, blkcg_css->id);
+ if (worker)
+ ret = refcount_inc_not_zero(&worker->refcnt);
+ rcu_read_unlock();
+
+ if (!worker || !ret)
+ worker = loop_alloc_or_get_worker(lo, blkcg_css);
/*
- * We need to remove from the idle list here while
- * holding the lock so that the idle timer doesn't
- * free the worker
+ * In the event we cannot allocate a worker, just queue on the
+ * rootcg worker and issue the I/O as the rootcg
*/
- if (!list_empty(&worker->idle_list))
- list_del_init(&worker->idle_list);
+ }
+
+ if (worker) {
work = &worker->work;
cmd_list = &worker->cmd_list;
+ lock = &worker->lock;
} else {
work = &lo->rootcg_work;
cmd_list = &lo->rootcg_cmd_list;
+ lock = &lo->lo_work_lock;
}
+
+ spin_lock(lock);
list_add_tail(&cmd->list_entry, cmd_list);
+ if (worker)
+ worker->reclaim_time = jiffies + LOOP_IDLE_WORKER_TIMEOUT;
+ spin_unlock(lock);
queue_work(lo->workqueue, work);
- spin_unlock(&lo->lo_work_lock);
}
static void loop_update_rotational(struct loop_device *lo)
@@ -1140,28 +1172,38 @@ loop_set_status_from_info(struct loop_device *lo,
return 0;
}
-static void loop_set_timer(struct loop_device *lo)
+static bool loop_need_reclaim_worker(struct loop_worker *worker)
{
- schedule_delayed_work(&lo->idle_work, LOOP_IDLE_WORKER_TIMEOUT);
+ bool reclaim = false;
+
+ spin_lock(&worker->lock);
+ if (list_empty(&worker->cmd_list) &&
+ time_is_before_jiffies(worker->reclaim_time))
+ reclaim = true;
+ else
+ reclaim = false;
+ spin_unlock(&worker->lock);
+
+ return reclaim;
}
static void __loop_free_idle_workers(struct loop_device *lo, bool force)
{
- struct loop_worker *pos, *worker;
+ struct loop_worker *worker;
+ unsigned long id;
spin_lock(&lo->lo_work_lock);
- list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
- idle_list) {
- if (!force && time_is_after_jiffies(worker->last_ran_at +
- LOOP_IDLE_WORKER_TIMEOUT))
- break;
- list_del(&worker->idle_list);
- xa_erase(&lo->workers, worker->blkcg_css->id);
- css_put(worker->blkcg_css);
- kfree(worker);
+ xa_for_each(&lo->workers, id, worker) {
+ if (!force && !loop_need_reclaim_worker(worker))
+ continue;
+
+ xa_erase(&worker->lo->workers, worker->blkcg_css->id);
+ if (refcount_dec_and_test(&worker->refcnt))
+ loop_release_worker(worker);
}
- if (!list_empty(&lo->idle_worker_list))
- loop_set_timer(lo);
+ if (!xa_empty(&lo->workers))
+ schedule_delayed_work(&lo->idle_work,
+ LOOP_IDLE_WORKER_TIMEOUT);
spin_unlock(&lo->lo_work_lock);
}
@@ -2162,42 +2204,36 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
}
static void loop_process_work(struct loop_worker *worker,
- struct list_head *cmd_list, struct loop_device *lo)
+ struct list_head *cmd_list, spinlock_t *lock)
{
int orig_flags = current->flags;
struct loop_cmd *cmd;
LIST_HEAD(list);
+ int cnt = 0;
current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
- spin_lock(&lo->lo_work_lock);
+ spin_lock(lock);
again:
list_splice_init(cmd_list, &list);
- spin_unlock(&lo->lo_work_lock);
+ spin_unlock(lock);
while (!list_empty(&list)) {
cmd = list_first_entry(&list, struct loop_cmd, list_entry);
list_del_init(&cmd->list_entry);
loop_handle_cmd(cmd);
+ cnt++;
}
- spin_lock(&lo->lo_work_lock);
+ spin_lock(lock);
if (!list_empty(cmd_list))
goto again;
-
- /*
- * We only add to the idle list if there are no pending cmds
- * *and* the worker will not run again which ensures that it
- * is safe to free any worker on the idle list
- */
- if (worker && !work_pending(&worker->work)) {
- worker->last_ran_at = jiffies;
- list_add_tail(&worker->idle_list, &lo->idle_worker_list);
- loop_set_timer(lo);
- }
- spin_unlock(&lo->lo_work_lock);
+ spin_unlock(lock);
current->flags = orig_flags;
+
+ if (worker && refcount_sub_and_test(cnt, &worker->refcnt))
+ loop_release_worker(worker);
}
static void loop_workfn(struct work_struct *work)
@@ -2212,11 +2248,11 @@ static void loop_workfn(struct work_struct *work)
if (memcg_css) {
old_memcg = set_active_memcg(
mem_cgroup_from_css(memcg_css));
- loop_process_work(worker, &worker->cmd_list, worker->lo);
+ loop_process_work(worker, &worker->cmd_list, &worker->lock);
set_active_memcg(old_memcg);
css_put(memcg_css);
} else {
- loop_process_work(worker, &worker->cmd_list, worker->lo);
+ loop_process_work(worker, &worker->cmd_list, &worker->lock);
}
kthread_associate_blkcg(NULL);
}
@@ -2225,7 +2261,7 @@ static void loop_rootcg_workfn(struct work_struct *work)
{
struct loop_device *lo =
container_of(work, struct loop_device, rootcg_work);
- loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
+ loop_process_work(NULL, &lo->rootcg_cmd_list, &lo->lo_work_lock);
}
static const struct blk_mq_ops loop_mq_ops = {
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH V2 7/7] loop: don't add worker into idle list
2021-07-28 8:16 ` [PATCH V2 7/7] loop: don't add worker into idle list Ming Lei
@ 2021-07-30 16:52 ` Dan Schatzberg
0 siblings, 0 replies; 9+ messages in thread
From: Dan Schatzberg @ 2021-07-30 16:52 UTC (permalink / raw)
To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-block, Christoph Hellwig
On Wed, Jul 28, 2021 at 04:16:38PM +0800, Ming Lei wrote:
> We can retrieve any workers via xarray, so not add it into idle list.
> Meantime reduce .lo_work_lock coverage, especially we don't need that
> in IO path except for adding/deleting worker into xarray.
>
> Also replace .last_ran_at with .reclaim_time, which is set when adding
> loop command into worker->cmd_list. Meantime reclaim the worker when
> the worker is expired and no any pending commands.
>
> Cc: Dan Schatzberg <schatzberg.dan@gmail.com>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
> drivers/block/loop.c | 172 ++++++++++++++++++++++++++-----------------
> 1 file changed, 104 insertions(+), 68 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 9b22e5469ed3..e4b292b2d8f4 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -920,10 +920,11 @@ static void loop_config_discard(struct loop_device *lo)
> struct loop_worker {
> struct work_struct work;
> struct list_head cmd_list;
> - struct list_head idle_list;
> struct loop_device *lo;
> struct cgroup_subsys_state *blkcg_css;
> - unsigned long last_ran_at;
> + unsigned long reclaim_time;
> + spinlock_t lock;
> + refcount_t refcnt;
> };
>
> static void loop_workfn(struct work_struct *work);
> @@ -963,62 +964,93 @@ static struct cgroup_subsys_state *loop_rq_get_memcg_css(
> return NULL;
> }
>
> -static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
> +static struct loop_worker *loop_alloc_or_get_worker(struct loop_device *lo,
> + struct cgroup_subsys_state *blkcg_css)
> {
> - struct loop_worker *worker = NULL;
> - struct work_struct *work;
> - struct list_head *cmd_list;
> - struct cgroup_subsys_state *blkcg_css = loop_rq_blkcg_css(cmd);
> gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
> + struct loop_worker *worker = kzalloc(sizeof(*worker), gfp);
> + struct loop_worker *worker_old;
>
> - spin_lock(&lo->lo_work_lock);
> -
> - if (queue_on_root_worker(blkcg_css))
> - goto queue_work;
> -
> - /* css->id is unique in each cgroup subsystem */
> - worker = xa_load(&lo->workers, blkcg_css->id);
> - if (worker)
> - goto queue_work;
> -
> - worker = kzalloc(sizeof(*worker), gfp);
> - /*
> - * In the event we cannot allocate a worker, just queue on the
> - * rootcg worker and issue the I/O as the rootcg
> - */
> if (!worker)
> - goto queue_work;
> + return NULL;
>
> worker->blkcg_css = blkcg_css;
> - css_get(worker->blkcg_css);
> INIT_WORK(&worker->work, loop_workfn);
> INIT_LIST_HEAD(&worker->cmd_list);
> - INIT_LIST_HEAD(&worker->idle_list);
> worker->lo = lo;
> + spin_lock_init(&worker->lock);
> + refcount_set(&worker->refcnt, 2); /* INIT + INC */
>
> - if (xa_err(xa_store(&lo->workers, blkcg_css->id, worker, gfp))) {
> + spin_lock(&lo->lo_work_lock);
> + /* maybe someone is storing a new worker */
> + worker_old = xa_load(&lo->workers, blkcg_css->id);
> + if (!worker_old || !refcount_inc_not_zero(&worker_old->refcnt)) {
> + if (xa_err(xa_store(&lo->workers, blkcg_css->id, worker, gfp))) {
> + kfree(worker);
> + worker = NULL;
> + } else {
> + if (!work_pending(&lo->idle_work.work))
> + schedule_delayed_work(&lo->idle_work,
> + LOOP_IDLE_WORKER_TIMEOUT);
> + css_get(worker->blkcg_css);
> + }
> + } else {
> kfree(worker);
> - worker = NULL;
> + worker = worker_old;
> }
> + spin_unlock(&lo->lo_work_lock);
>
> -queue_work:
> - if (worker) {
> + return worker;
> +}
> +
> +static void loop_release_worker(struct loop_worker *worker)
> +{
> + css_put(worker->blkcg_css);
> + kfree_rcu(worker);
> +}
> +
> +static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
> +{
> + struct loop_worker *worker = NULL;
> + struct work_struct *work;
> + struct list_head *cmd_list;
> + struct cgroup_subsys_state *blkcg_css = loop_rq_blkcg_css(cmd);
> + spinlock_t *lock;
> +
> + if (!queue_on_root_worker(blkcg_css)) {
> + int ret = 0;
> +
> + rcu_read_lock();
> + /* css->id is unique in each cgroup subsystem */
> + worker = xa_load(&lo->workers, blkcg_css->id);
> + if (worker)
> + ret = refcount_inc_not_zero(&worker->refcnt);
> + rcu_read_unlock();
> +
> + if (!worker || !ret)
> + worker = loop_alloc_or_get_worker(lo, blkcg_css);
> /*
> - * We need to remove from the idle list here while
> - * holding the lock so that the idle timer doesn't
> - * free the worker
> + * In the event we cannot allocate a worker, just queue on the
> + * rootcg worker and issue the I/O as the rootcg
> */
> - if (!list_empty(&worker->idle_list))
> - list_del_init(&worker->idle_list);
> + }
> +
> + if (worker) {
> work = &worker->work;
> cmd_list = &worker->cmd_list;
> + lock = &worker->lock;
> } else {
> work = &lo->rootcg_work;
> cmd_list = &lo->rootcg_cmd_list;
> + lock = &lo->lo_work_lock;
> }
> +
> + spin_lock(lock);
> list_add_tail(&cmd->list_entry, cmd_list);
> + if (worker)
> + worker->reclaim_time = jiffies + LOOP_IDLE_WORKER_TIMEOUT;
> + spin_unlock(lock);
> queue_work(lo->workqueue, work);
> - spin_unlock(&lo->lo_work_lock);
> }
>
> static void loop_update_rotational(struct loop_device *lo)
> @@ -1140,28 +1172,38 @@ loop_set_status_from_info(struct loop_device *lo,
> return 0;
> }
>
> -static void loop_set_timer(struct loop_device *lo)
> +static bool loop_need_reclaim_worker(struct loop_worker *worker)
> {
> - schedule_delayed_work(&lo->idle_work, LOOP_IDLE_WORKER_TIMEOUT);
> + bool reclaim = false;
You don't need both this initialization and the else block below. I
think it's easier to read if you just remove one of them.
> +
> + spin_lock(&worker->lock);
> + if (list_empty(&worker->cmd_list) &&
> + time_is_before_jiffies(worker->reclaim_time))
> + reclaim = true;
> + else
> + reclaim = false;
> + spin_unlock(&worker->lock);
> +
> + return reclaim;
> }
>
> static void __loop_free_idle_workers(struct loop_device *lo, bool force)
> {
> - struct loop_worker *pos, *worker;
> + struct loop_worker *worker;
> + unsigned long id;
>
> spin_lock(&lo->lo_work_lock);
> - list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
> - idle_list) {
> - if (!force && time_is_after_jiffies(worker->last_ran_at +
> - LOOP_IDLE_WORKER_TIMEOUT))
> - break;
> - list_del(&worker->idle_list);
> - xa_erase(&lo->workers, worker->blkcg_css->id);
> - css_put(worker->blkcg_css);
> - kfree(worker);
> + xa_for_each(&lo->workers, id, worker) {
> + if (!force && !loop_need_reclaim_worker(worker))
> + continue;
> +
> + xa_erase(&worker->lo->workers, worker->blkcg_css->id);
> + if (refcount_dec_and_test(&worker->refcnt))
> + loop_release_worker(worker);
> }
> - if (!list_empty(&lo->idle_worker_list))
> - loop_set_timer(lo);
> + if (!xa_empty(&lo->workers))
> + schedule_delayed_work(&lo->idle_work,
> + LOOP_IDLE_WORKER_TIMEOUT);
> spin_unlock(&lo->lo_work_lock);
> }
>
> @@ -2162,42 +2204,36 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
> }
>
> static void loop_process_work(struct loop_worker *worker,
> - struct list_head *cmd_list, struct loop_device *lo)
> + struct list_head *cmd_list, spinlock_t *lock)
> {
> int orig_flags = current->flags;
> struct loop_cmd *cmd;
> LIST_HEAD(list);
> + int cnt = 0;
>
> current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
>
> - spin_lock(&lo->lo_work_lock);
> + spin_lock(lock);
> again:
> list_splice_init(cmd_list, &list);
> - spin_unlock(&lo->lo_work_lock);
> + spin_unlock(lock);
>
> while (!list_empty(&list)) {
> cmd = list_first_entry(&list, struct loop_cmd, list_entry);
> list_del_init(&cmd->list_entry);
>
> loop_handle_cmd(cmd);
> + cnt++;
> }
>
> - spin_lock(&lo->lo_work_lock);
> + spin_lock(lock);
> if (!list_empty(cmd_list))
> goto again;
> -
> - /*
> - * We only add to the idle list if there are no pending cmds
> - * *and* the worker will not run again which ensures that it
> - * is safe to free any worker on the idle list
> - */
> - if (worker && !work_pending(&worker->work)) {
> - worker->last_ran_at = jiffies;
> - list_add_tail(&worker->idle_list, &lo->idle_worker_list);
> - loop_set_timer(lo);
> - }
> - spin_unlock(&lo->lo_work_lock);
> + spin_unlock(lock);
> current->flags = orig_flags;
> +
> + if (worker && refcount_sub_and_test(cnt, &worker->refcnt))
> + loop_release_worker(worker);
> }
>
> static void loop_workfn(struct work_struct *work)
> @@ -2212,11 +2248,11 @@ static void loop_workfn(struct work_struct *work)
> if (memcg_css) {
> old_memcg = set_active_memcg(
> mem_cgroup_from_css(memcg_css));
> - loop_process_work(worker, &worker->cmd_list, worker->lo);
> + loop_process_work(worker, &worker->cmd_list, &worker->lock);
> set_active_memcg(old_memcg);
> css_put(memcg_css);
> } else {
> - loop_process_work(worker, &worker->cmd_list, worker->lo);
> + loop_process_work(worker, &worker->cmd_list, &worker->lock);
> }
> kthread_associate_blkcg(NULL);
> }
> @@ -2225,7 +2261,7 @@ static void loop_rootcg_workfn(struct work_struct *work)
> {
> struct loop_device *lo =
> container_of(work, struct loop_device, rootcg_work);
> - loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
> + loop_process_work(NULL, &lo->rootcg_cmd_list, &lo->lo_work_lock);
> }
>
> static const struct blk_mq_ops loop_mq_ops = {
> --
> 2.31.1
>
This is all great.
Acked-by: Dan Schatzberg <schatzberg.dan@gmail.com>
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2021-07-30 16:52 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-28 8:16 [PATCH V2 0/7] loop: cleanup charging io to mem/blkcg Ming Lei
2021-07-28 8:16 ` [PATCH V2 1/7] mm: memcontrol: add helper of memcg_get_e_css Ming Lei
2021-07-28 8:16 ` [PATCH V2 2/7] loop: clean up blkcg association Ming Lei
2021-07-28 8:16 ` [PATCH V2 3/7] loop: conver timer for monitoring idle worker into dwork Ming Lei
2021-07-28 8:16 ` [PATCH V2 4/7] loop: add __loop_free_idle_workers() for covering freeing workers in clearing FD Ming Lei
2021-07-28 8:16 ` [PATCH V2 5/7] loop: improve loop_process_work Ming Lei
2021-07-28 8:16 ` [PATCH V2 6/7] loop: use xarray to store workers Ming Lei
2021-07-28 8:16 ` [PATCH V2 7/7] loop: don't add worker into idle list Ming Lei
2021-07-30 16:52 ` Dan Schatzberg
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).