LKML Archive on lore.kernel.org help / color / mirror / Atom feed
* [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag @ 2020-03-03 9:33 Yanhu Cao 2020-03-09 20:43 ` Ilya Dryomov 0 siblings, 1 reply; 6+ messages in thread From: Yanhu Cao @ 2020-03-03 9:33 UTC (permalink / raw) To: jlayton Cc: sage, idryomov, davem, kuba, ceph-devel, linux-kernel, netdev, Yanhu Cao CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it does not work well in new versions, added POOL flags to handle it. Signed-off-by: Yanhu Cao <gmayyyha@gmail.com> --- fs/ceph/file.c | 9 +++++++-- include/linux/ceph/osd_client.h | 2 ++ include/linux/ceph/osdmap.h | 3 ++- net/ceph/osd_client.c | 23 +++++++++++++---------- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7e0190b1f821..84ec44f9d77a 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) } /* FIXME: not complete since it doesn't account for being at quota */ - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) || + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, + CEPH_POOL_FLAG_FULL)) { err = -ENOSPC; goto out; } @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) } if (written >= 0) { - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) + if (ceph_osdmap_flag(&fsc->client->osdc, + CEPH_OSDMAP_NEARFULL) || + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, + CEPH_POOL_FLAG_NEARFULL)) iocb->ki_flags |= IOCB_DSYNC; written = generic_write_sync(iocb, written); } diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 5a62dbd3f4c2..be9007b93862 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) return osdc->osdmap->flags & flag; } +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag); + extern int ceph_osdc_setup(void); extern void ceph_osdc_cleanup(void); diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e081b56f1c1d..88faacc11f55 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id together */ -#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */ struct ceph_pg_pool_info { struct rb_node node; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b68b376d8c2f..9ad2b96c3e78 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req) atomic_dec(&osd->o_osdc->num_homeless); } -static bool __pool_full(struct ceph_pg_pool_info *pi) +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag) { - return pi->flags & CEPH_POOL_FLAG_FULL; + return pi->flags & flag; } static bool have_pool_full(struct ceph_osd_client *osdc) @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc) struct ceph_pg_pool_info *pi = rb_entry(n, struct ceph_pg_pool_info, node); - if (__pool_full(pi)) + if (__pool_flag(pi, CEPH_POOL_FLAG_FULL)) return true; } return false; } -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag) { struct ceph_pg_pool_info *pi; @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) if (!pi) return false; - return __pool_full(pi); + return __pool_flag(pi, flag); } +EXPORT_SYMBOL(pool_flag); + /* * Returns whether a request should be blocked from being sent @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc, bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || - __pool_full(pi); + __pool_flag(pi, CEPH_POOL_FLAG_FULL); WARN_ON(pi->id != t->target_oloc.pool); return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) || @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY | CEPH_OSD_FLAG_FULL_FORCE)) && (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || - pool_full(osdc, req->r_t.base_oloc.pool))) { + pool_flag(osdc, req->r_t.base_oloc.pool, + CEPH_POOL_FLAG_FULL))) { dout("req %p full/pool_full\n", req); if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) { err = -ENOSPC; @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg) if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || - pool_full(osdc, req->r_t.base_oloc.pool))) { + pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) { if (!*victims) { update_epoch_barrier(osdc, osdc->osdmap->epoch); *victims = true; @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc) struct ceph_pg_pool_info *pi = rb_entry(n, struct ceph_pg_pool_info, node); - pi->was_full = __pool_full(pi); + pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL); } } @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id) if (!pi) return false; - return pi->was_full && !__pool_full(pi); + return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL); } static enum calc_target_result -- 2.21.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag 2020-03-03 9:33 [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag Yanhu Cao @ 2020-03-09 20:43 ` Ilya Dryomov 2020-03-11 9:55 ` Yanhu Cao 0 siblings, 1 reply; 6+ messages in thread From: Ilya Dryomov @ 2020-03-09 20:43 UTC (permalink / raw) To: Yanhu Cao Cc: Jeff Layton, Sage Weil, David S. Miller, kuba, Ceph Development, LKML, netdev [-- Attachment #1: Type: text/plain, Size: 7255 bytes --] On Tue, Mar 3, 2020 at 10:33 AM Yanhu Cao <gmayyyha@gmail.com> wrote: > > CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it > does not work well in new versions, added POOL flags to handle it. > > Signed-off-by: Yanhu Cao <gmayyyha@gmail.com> > --- > fs/ceph/file.c | 9 +++++++-- > include/linux/ceph/osd_client.h | 2 ++ > include/linux/ceph/osdmap.h | 3 ++- > net/ceph/osd_client.c | 23 +++++++++++++---------- > 4 files changed, 24 insertions(+), 13 deletions(-) > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index 7e0190b1f821..84ec44f9d77a 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > } > > /* FIXME: not complete since it doesn't account for being at quota */ > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { > + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) || > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, > + CEPH_POOL_FLAG_FULL)) { > err = -ENOSPC; > goto out; > } > @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > } > > if (written >= 0) { > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) > + if (ceph_osdmap_flag(&fsc->client->osdc, > + CEPH_OSDMAP_NEARFULL) || > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, > + CEPH_POOL_FLAG_NEARFULL)) > iocb->ki_flags |= IOCB_DSYNC; > written = generic_write_sync(iocb, written); > } > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h > index 5a62dbd3f4c2..be9007b93862 100644 > --- a/include/linux/ceph/osd_client.h > +++ b/include/linux/ceph/osd_client.h > @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) > return osdc->osdmap->flags & flag; > } > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag); > + > extern int ceph_osdc_setup(void); > extern void ceph_osdc_cleanup(void); > > diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h > index e081b56f1c1d..88faacc11f55 100644 > --- a/include/linux/ceph/osdmap.h > +++ b/include/linux/ceph/osdmap.h > @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); > > #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id > together */ > -#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ > +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ > +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */ > > struct ceph_pg_pool_info { > struct rb_node node; > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > index b68b376d8c2f..9ad2b96c3e78 100644 > --- a/net/ceph/osd_client.c > +++ b/net/ceph/osd_client.c > @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req) > atomic_dec(&osd->o_osdc->num_homeless); > } > > -static bool __pool_full(struct ceph_pg_pool_info *pi) > +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag) > { > - return pi->flags & CEPH_POOL_FLAG_FULL; > + return pi->flags & flag; > } > > static bool have_pool_full(struct ceph_osd_client *osdc) > @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc) > struct ceph_pg_pool_info *pi = > rb_entry(n, struct ceph_pg_pool_info, node); > > - if (__pool_full(pi)) > + if (__pool_flag(pi, CEPH_POOL_FLAG_FULL)) > return true; > } > > return false; > } > > -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag) > { > struct ceph_pg_pool_info *pi; > > @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) > if (!pi) > return false; > > - return __pool_full(pi); > + return __pool_flag(pi, flag); > } > +EXPORT_SYMBOL(pool_flag); > + > > /* > * Returns whether a request should be blocked from being sent > @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc, > bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); > bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || > ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > - __pool_full(pi); > + __pool_flag(pi, CEPH_POOL_FLAG_FULL); > > WARN_ON(pi->id != t->target_oloc.pool); > return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) || > @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) > !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY | > CEPH_OSD_FLAG_FULL_FORCE)) && > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > - pool_full(osdc, req->r_t.base_oloc.pool))) { > + pool_flag(osdc, req->r_t.base_oloc.pool, > + CEPH_POOL_FLAG_FULL))) { > dout("req %p full/pool_full\n", req); > if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) { > err = -ENOSPC; > @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg) > > if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > - pool_full(osdc, req->r_t.base_oloc.pool))) { > + pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) { > if (!*victims) { > update_epoch_barrier(osdc, osdc->osdmap->epoch); > *victims = true; > @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc) > struct ceph_pg_pool_info *pi = > rb_entry(n, struct ceph_pg_pool_info, node); > > - pi->was_full = __pool_full(pi); > + pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL); > } > } > > @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id) > if (!pi) > return false; > > - return pi->was_full && !__pool_full(pi); > + return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL); > } > > static enum calc_target_result Hi Yanhu, Sorry for a late reply. This adds some unnecessary churn and also exposes a helper that must be called under osdc->lock without making that obvious. How about the attached instead? ceph_pg_pool_flags() takes osdmap instead of osdc, making it clear that the caller is resposibile for keeping the map stable. Thanks, Ilya [-- Attachment #2: full-nearfull.patch --] [-- Type: text/x-patch, Size: 3026 bytes --] diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ba46ba740628..c8d84e90a371 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1663,10 +1663,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; int err, want, got; bool direct_lock = false; + u32 map_flags; + u64 pool_flags; loff_t pos; loff_t limit = max(i_size_read(inode), fsc->max_file_size); @@ -1730,7 +1733,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) } /* FIXME: not complete since it doesn't account for being at quota */ - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { + down_read(&osdc->lock); + map_flags = osdc->osdmap->flags; + pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id); + up_read(&osdc->lock); + if ((map_flags & CEPH_OSDMAP_FULL) || + (pool_flags & CEPH_POOL_FLAG_FULL)) { err = -ENOSPC; goto out; } @@ -1823,7 +1831,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) } if (written >= 0) { - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) + if ((map_flags & CEPH_OSDMAP_NEARFULL) || + (pool_flags & CEPH_POOL_FLAG_NEARFULL)) iocb->ki_flags |= IOCB_DSYNC; written = generic_write_sync(iocb, written); } diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e081b56f1c1d..651e1b967608 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -37,6 +37,7 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id together */ #define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */ struct ceph_pg_pool_info { struct rb_node node; @@ -304,5 +305,6 @@ extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); +u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id); #endif diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 4e0de14f80bb..2a6e63a8edbe 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -710,6 +710,15 @@ int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) } EXPORT_SYMBOL(ceph_pg_poolid_by_name); +u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id) +{ + struct ceph_pg_pool_info *pi; + + pi = __lookup_pg_pool(&map->pg_pools, id); + return pi ? pi->flags : 0; +} +EXPORT_SYMBOL(ceph_pg_pool_flags); + static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) { rb_erase(&pi->node, root); ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag 2020-03-09 20:43 ` Ilya Dryomov @ 2020-03-11 9:55 ` Yanhu Cao 2020-03-11 13:41 ` Ilya Dryomov 0 siblings, 1 reply; 6+ messages in thread From: Yanhu Cao @ 2020-03-11 9:55 UTC (permalink / raw) To: Ilya Dryomov Cc: Jeff Layton, Sage Weil, David S. Miller, kuba, Ceph Development, LKML, netdev On Tue, Mar 10, 2020 at 4:43 AM Ilya Dryomov <idryomov@gmail.com> wrote: > > On Tue, Mar 3, 2020 at 10:33 AM Yanhu Cao <gmayyyha@gmail.com> wrote: > > > > CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it > > does not work well in new versions, added POOL flags to handle it. > > > > Signed-off-by: Yanhu Cao <gmayyyha@gmail.com> > > --- > > fs/ceph/file.c | 9 +++++++-- > > include/linux/ceph/osd_client.h | 2 ++ > > include/linux/ceph/osdmap.h | 3 ++- > > net/ceph/osd_client.c | 23 +++++++++++++---------- > > 4 files changed, 24 insertions(+), 13 deletions(-) > > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > > index 7e0190b1f821..84ec44f9d77a 100644 > > --- a/fs/ceph/file.c > > +++ b/fs/ceph/file.c > > @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > > } > > > > /* FIXME: not complete since it doesn't account for being at quota */ > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { > > + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) || > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, > > + CEPH_POOL_FLAG_FULL)) { > > err = -ENOSPC; > > goto out; > > } > > @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > > } > > > > if (written >= 0) { > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) > > + if (ceph_osdmap_flag(&fsc->client->osdc, > > + CEPH_OSDMAP_NEARFULL) || > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, > > + CEPH_POOL_FLAG_NEARFULL)) > > iocb->ki_flags |= IOCB_DSYNC; > > written = generic_write_sync(iocb, written); > > } > > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h > > index 5a62dbd3f4c2..be9007b93862 100644 > > --- a/include/linux/ceph/osd_client.h > > +++ b/include/linux/ceph/osd_client.h > > @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) > > return osdc->osdmap->flags & flag; > > } > > > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag); > > + > > extern int ceph_osdc_setup(void); > > extern void ceph_osdc_cleanup(void); > > > > diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h > > index e081b56f1c1d..88faacc11f55 100644 > > --- a/include/linux/ceph/osdmap.h > > +++ b/include/linux/ceph/osdmap.h > > @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); > > > > #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id > > together */ > > -#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ > > +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ > > +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */ > > > > struct ceph_pg_pool_info { > > struct rb_node node; > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > > index b68b376d8c2f..9ad2b96c3e78 100644 > > --- a/net/ceph/osd_client.c > > +++ b/net/ceph/osd_client.c > > @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req) > > atomic_dec(&osd->o_osdc->num_homeless); > > } > > > > -static bool __pool_full(struct ceph_pg_pool_info *pi) > > +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag) > > { > > - return pi->flags & CEPH_POOL_FLAG_FULL; > > + return pi->flags & flag; > > } > > > > static bool have_pool_full(struct ceph_osd_client *osdc) > > @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc) > > struct ceph_pg_pool_info *pi = > > rb_entry(n, struct ceph_pg_pool_info, node); > > > > - if (__pool_full(pi)) > > + if (__pool_flag(pi, CEPH_POOL_FLAG_FULL)) > > return true; > > } > > > > return false; > > } > > > > -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag) > > { > > struct ceph_pg_pool_info *pi; > > > > @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) > > if (!pi) > > return false; > > > > - return __pool_full(pi); > > + return __pool_flag(pi, flag); > > } > > +EXPORT_SYMBOL(pool_flag); > > + > > > > /* > > * Returns whether a request should be blocked from being sent > > @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc, > > bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); > > bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || > > ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > - __pool_full(pi); > > + __pool_flag(pi, CEPH_POOL_FLAG_FULL); > > > > WARN_ON(pi->id != t->target_oloc.pool); > > return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) || > > @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) > > !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY | > > CEPH_OSD_FLAG_FULL_FORCE)) && > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > - pool_full(osdc, req->r_t.base_oloc.pool))) { > > + pool_flag(osdc, req->r_t.base_oloc.pool, > > + CEPH_POOL_FLAG_FULL))) { > > dout("req %p full/pool_full\n", req); > > if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) { > > err = -ENOSPC; > > @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg) > > > > if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > - pool_full(osdc, req->r_t.base_oloc.pool))) { > > + pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) { > > if (!*victims) { > > update_epoch_barrier(osdc, osdc->osdmap->epoch); > > *victims = true; > > @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc) > > struct ceph_pg_pool_info *pi = > > rb_entry(n, struct ceph_pg_pool_info, node); > > > > - pi->was_full = __pool_full(pi); > > + pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL); > > } > > } > > > > @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id) > > if (!pi) > > return false; > > > > - return pi->was_full && !__pool_full(pi); > > + return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL); > > } > > > > static enum calc_target_result > > Hi Yanhu, > > Sorry for a late reply. > > This adds some unnecessary churn and also exposes a helper that > must be called under osdc->lock without making that obvious. How > about the attached instead? > > ceph_pg_pool_flags() takes osdmap instead of osdc, making it clear > that the caller is resposibile for keeping the map stable. > > Thanks, > > Ilya net/ceph/osdmap.c -------------------------- bool ceph_pg_pool_flags(struct ceph_osdmap *map, s64 pool_id, int flag) { struct ceph_pg_pool_info *pi; /* CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL deprecated since mimic */ if (flag & (CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_NEARFULL)) if (map->flags & (CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL)) return true; pi = ceph_pg_pool_by_id(map, pool_id); if (!pi) return false; return pi->flags & flag; } fs/ceph/file.c ----------------- ceph_write_iter() { ... down_read(&osdc->lock); if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id, CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_FULL_QUOTA)) { err = -ENOSPC; up_read(&osdc->lock); goto out; } up_read(&osdc->lock); ... down_read(&osdc->lock); if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id, CEPH_POOL_FLAG_NEARFULL)) iocb->ki_flags |= IOCB_DSYNC; up_read(&osdc->lock); ... } how about this? Thanks. ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag 2020-03-11 9:55 ` Yanhu Cao @ 2020-03-11 13:41 ` Ilya Dryomov 2020-03-12 8:40 ` Yanhu Cao 0 siblings, 1 reply; 6+ messages in thread From: Ilya Dryomov @ 2020-03-11 13:41 UTC (permalink / raw) To: Yanhu Cao Cc: Jeff Layton, Sage Weil, David S. Miller, kuba, Ceph Development, LKML, netdev On Wed, Mar 11, 2020 at 10:55 AM Yanhu Cao <gmayyyha@gmail.com> wrote: > > On Tue, Mar 10, 2020 at 4:43 AM Ilya Dryomov <idryomov@gmail.com> wrote: > > > > On Tue, Mar 3, 2020 at 10:33 AM Yanhu Cao <gmayyyha@gmail.com> wrote: > > > > > > CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it > > > does not work well in new versions, added POOL flags to handle it. > > > > > > Signed-off-by: Yanhu Cao <gmayyyha@gmail.com> > > > --- > > > fs/ceph/file.c | 9 +++++++-- > > > include/linux/ceph/osd_client.h | 2 ++ > > > include/linux/ceph/osdmap.h | 3 ++- > > > net/ceph/osd_client.c | 23 +++++++++++++---------- > > > 4 files changed, 24 insertions(+), 13 deletions(-) > > > > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > > > index 7e0190b1f821..84ec44f9d77a 100644 > > > --- a/fs/ceph/file.c > > > +++ b/fs/ceph/file.c > > > @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > > > } > > > > > > /* FIXME: not complete since it doesn't account for being at quota */ > > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { > > > + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) || > > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, > > > + CEPH_POOL_FLAG_FULL)) { > > > err = -ENOSPC; > > > goto out; > > > } > > > @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > > > } > > > > > > if (written >= 0) { > > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) > > > + if (ceph_osdmap_flag(&fsc->client->osdc, > > > + CEPH_OSDMAP_NEARFULL) || > > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, > > > + CEPH_POOL_FLAG_NEARFULL)) > > > iocb->ki_flags |= IOCB_DSYNC; > > > written = generic_write_sync(iocb, written); > > > } > > > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h > > > index 5a62dbd3f4c2..be9007b93862 100644 > > > --- a/include/linux/ceph/osd_client.h > > > +++ b/include/linux/ceph/osd_client.h > > > @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) > > > return osdc->osdmap->flags & flag; > > > } > > > > > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag); > > > + > > > extern int ceph_osdc_setup(void); > > > extern void ceph_osdc_cleanup(void); > > > > > > diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h > > > index e081b56f1c1d..88faacc11f55 100644 > > > --- a/include/linux/ceph/osdmap.h > > > +++ b/include/linux/ceph/osdmap.h > > > @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); > > > > > > #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id > > > together */ > > > -#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ > > > +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ > > > +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */ > > > > > > struct ceph_pg_pool_info { > > > struct rb_node node; > > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > > > index b68b376d8c2f..9ad2b96c3e78 100644 > > > --- a/net/ceph/osd_client.c > > > +++ b/net/ceph/osd_client.c > > > @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req) > > > atomic_dec(&osd->o_osdc->num_homeless); > > > } > > > > > > -static bool __pool_full(struct ceph_pg_pool_info *pi) > > > +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag) > > > { > > > - return pi->flags & CEPH_POOL_FLAG_FULL; > > > + return pi->flags & flag; > > > } > > > > > > static bool have_pool_full(struct ceph_osd_client *osdc) > > > @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc) > > > struct ceph_pg_pool_info *pi = > > > rb_entry(n, struct ceph_pg_pool_info, node); > > > > > > - if (__pool_full(pi)) > > > + if (__pool_flag(pi, CEPH_POOL_FLAG_FULL)) > > > return true; > > > } > > > > > > return false; > > > } > > > > > > -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) > > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag) > > > { > > > struct ceph_pg_pool_info *pi; > > > > > > @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) > > > if (!pi) > > > return false; > > > > > > - return __pool_full(pi); > > > + return __pool_flag(pi, flag); > > > } > > > +EXPORT_SYMBOL(pool_flag); > > > + > > > > > > /* > > > * Returns whether a request should be blocked from being sent > > > @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc, > > > bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); > > > bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || > > > ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > > - __pool_full(pi); > > > + __pool_flag(pi, CEPH_POOL_FLAG_FULL); > > > > > > WARN_ON(pi->id != t->target_oloc.pool); > > > return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) || > > > @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) > > > !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY | > > > CEPH_OSD_FLAG_FULL_FORCE)) && > > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > > - pool_full(osdc, req->r_t.base_oloc.pool))) { > > > + pool_flag(osdc, req->r_t.base_oloc.pool, > > > + CEPH_POOL_FLAG_FULL))) { > > > dout("req %p full/pool_full\n", req); > > > if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) { > > > err = -ENOSPC; > > > @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg) > > > > > > if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && > > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > > - pool_full(osdc, req->r_t.base_oloc.pool))) { > > > + pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) { > > > if (!*victims) { > > > update_epoch_barrier(osdc, osdc->osdmap->epoch); > > > *victims = true; > > > @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc) > > > struct ceph_pg_pool_info *pi = > > > rb_entry(n, struct ceph_pg_pool_info, node); > > > > > > - pi->was_full = __pool_full(pi); > > > + pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL); > > > } > > > } > > > > > > @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id) > > > if (!pi) > > > return false; > > > > > > - return pi->was_full && !__pool_full(pi); > > > + return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL); > > > } > > > > > > static enum calc_target_result > > > > Hi Yanhu, > > > > Sorry for a late reply. > > > > This adds some unnecessary churn and also exposes a helper that > > must be called under osdc->lock without making that obvious. How > > about the attached instead? > > > > ceph_pg_pool_flags() takes osdmap instead of osdc, making it clear > > that the caller is resposibile for keeping the map stable. > > > > Thanks, > > > > Ilya > > net/ceph/osdmap.c > -------------------------- > bool ceph_pg_pool_flags(struct ceph_osdmap *map, s64 pool_id, int flag) > { > struct ceph_pg_pool_info *pi; > > /* CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL deprecated since mimic */ > if (flag & (CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_NEARFULL)) > if (map->flags & (CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL)) > return true; > > pi = ceph_pg_pool_by_id(map, pool_id); > if (!pi) > return false; > > return pi->flags & flag; > } > > fs/ceph/file.c > ----------------- > ceph_write_iter() { > ... > down_read(&osdc->lock); > if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id, > CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_FULL_QUOTA)) { > err = -ENOSPC; > up_read(&osdc->lock); > goto out; > } > up_read(&osdc->lock); > ... > down_read(&osdc->lock); > if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id, > CEPH_POOL_FLAG_NEARFULL)) > iocb->ki_flags |= IOCB_DSYNC; > up_read(&osdc->lock); > ... > } > > how about this? Well, this takes osdc->lock and looks up ceph_pg_pool_info twice. Given that these checks are inherently racy, I think doing it once at the top makes more sense. Also, I don't think this does what you intended it to do. Your ceph_pg_pool_flags(..., CEPH_POOL_FLAG_FULL) returns true even if the map only has CEPH_OSDMAP_NEARFULL, triggering early ENOSPC. Checking CEPH_POOL_FLAG_FULL_QUOTA is not necessary, because it is set together with CEPH_POOL_FLAG_FULL: src/osd/osd_types.h: 1199 FLAG_FULL_QUOTA = 1<<10, // pool is currently running out of quota, will set FLAG_FULL too Thanks, Ilya ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag 2020-03-11 13:41 ` Ilya Dryomov @ 2020-03-12 8:40 ` Yanhu Cao 2020-03-12 14:33 ` Ilya Dryomov 0 siblings, 1 reply; 6+ messages in thread From: Yanhu Cao @ 2020-03-12 8:40 UTC (permalink / raw) To: Ilya Dryomov Cc: Jeff Layton, Sage Weil, David S. Miller, kuba, Ceph Development, LKML, netdev On Wed, Mar 11, 2020 at 9:41 PM Ilya Dryomov <idryomov@gmail.com> wrote: > > On Wed, Mar 11, 2020 at 10:55 AM Yanhu Cao <gmayyyha@gmail.com> wrote: > > > > On Tue, Mar 10, 2020 at 4:43 AM Ilya Dryomov <idryomov@gmail.com> wrote: > > > > > > On Tue, Mar 3, 2020 at 10:33 AM Yanhu Cao <gmayyyha@gmail.com> wrote: > > > > > > > > CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it > > > > does not work well in new versions, added POOL flags to handle it. > > > > > > > > Signed-off-by: Yanhu Cao <gmayyyha@gmail.com> > > > > --- > > > > fs/ceph/file.c | 9 +++++++-- > > > > include/linux/ceph/osd_client.h | 2 ++ > > > > include/linux/ceph/osdmap.h | 3 ++- > > > > net/ceph/osd_client.c | 23 +++++++++++++---------- > > > > 4 files changed, 24 insertions(+), 13 deletions(-) > > > > > > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > > > > index 7e0190b1f821..84ec44f9d77a 100644 > > > > --- a/fs/ceph/file.c > > > > +++ b/fs/ceph/file.c > > > > @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > > > > } > > > > > > > > /* FIXME: not complete since it doesn't account for being at quota */ > > > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { > > > > + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) || > > > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, > > > > + CEPH_POOL_FLAG_FULL)) { > > > > err = -ENOSPC; > > > > goto out; > > > > } > > > > @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > > > > } > > > > > > > > if (written >= 0) { > > > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) > > > > + if (ceph_osdmap_flag(&fsc->client->osdc, > > > > + CEPH_OSDMAP_NEARFULL) || > > > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, > > > > + CEPH_POOL_FLAG_NEARFULL)) > > > > iocb->ki_flags |= IOCB_DSYNC; > > > > written = generic_write_sync(iocb, written); > > > > } > > > > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h > > > > index 5a62dbd3f4c2..be9007b93862 100644 > > > > --- a/include/linux/ceph/osd_client.h > > > > +++ b/include/linux/ceph/osd_client.h > > > > @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) > > > > return osdc->osdmap->flags & flag; > > > > } > > > > > > > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag); > > > > + > > > > extern int ceph_osdc_setup(void); > > > > extern void ceph_osdc_cleanup(void); > > > > > > > > diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h > > > > index e081b56f1c1d..88faacc11f55 100644 > > > > --- a/include/linux/ceph/osdmap.h > > > > +++ b/include/linux/ceph/osdmap.h > > > > @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); > > > > > > > > #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id > > > > together */ > > > > -#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ > > > > +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ > > > > +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */ > > > > > > > > struct ceph_pg_pool_info { > > > > struct rb_node node; > > > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > > > > index b68b376d8c2f..9ad2b96c3e78 100644 > > > > --- a/net/ceph/osd_client.c > > > > +++ b/net/ceph/osd_client.c > > > > @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req) > > > > atomic_dec(&osd->o_osdc->num_homeless); > > > > } > > > > > > > > -static bool __pool_full(struct ceph_pg_pool_info *pi) > > > > +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag) > > > > { > > > > - return pi->flags & CEPH_POOL_FLAG_FULL; > > > > + return pi->flags & flag; > > > > } > > > > > > > > static bool have_pool_full(struct ceph_osd_client *osdc) > > > > @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc) > > > > struct ceph_pg_pool_info *pi = > > > > rb_entry(n, struct ceph_pg_pool_info, node); > > > > > > > > - if (__pool_full(pi)) > > > > + if (__pool_flag(pi, CEPH_POOL_FLAG_FULL)) > > > > return true; > > > > } > > > > > > > > return false; > > > > } > > > > > > > > -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) > > > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag) > > > > { > > > > struct ceph_pg_pool_info *pi; > > > > > > > > @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) > > > > if (!pi) > > > > return false; > > > > > > > > - return __pool_full(pi); > > > > + return __pool_flag(pi, flag); > > > > } > > > > +EXPORT_SYMBOL(pool_flag); > > > > + > > > > > > > > /* > > > > * Returns whether a request should be blocked from being sent > > > > @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc, > > > > bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); > > > > bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || > > > > ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > > > - __pool_full(pi); > > > > + __pool_flag(pi, CEPH_POOL_FLAG_FULL); > > > > > > > > WARN_ON(pi->id != t->target_oloc.pool); > > > > return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) || > > > > @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) > > > > !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY | > > > > CEPH_OSD_FLAG_FULL_FORCE)) && > > > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > > > - pool_full(osdc, req->r_t.base_oloc.pool))) { > > > > + pool_flag(osdc, req->r_t.base_oloc.pool, > > > > + CEPH_POOL_FLAG_FULL))) { > > > > dout("req %p full/pool_full\n", req); > > > > if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) { > > > > err = -ENOSPC; > > > > @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg) > > > > > > > > if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && > > > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > > > - pool_full(osdc, req->r_t.base_oloc.pool))) { > > > > + pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) { > > > > if (!*victims) { > > > > update_epoch_barrier(osdc, osdc->osdmap->epoch); > > > > *victims = true; > > > > @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc) > > > > struct ceph_pg_pool_info *pi = > > > > rb_entry(n, struct ceph_pg_pool_info, node); > > > > > > > > - pi->was_full = __pool_full(pi); > > > > + pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL); > > > > } > > > > } > > > > > > > > @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id) > > > > if (!pi) > > > > return false; > > > > > > > > - return pi->was_full && !__pool_full(pi); > > > > + return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL); > > > > } > > > > > > > > static enum calc_target_result > > > > > > Hi Yanhu, > > > > > > Sorry for a late reply. > > > > > > This adds some unnecessary churn and also exposes a helper that > > > must be called under osdc->lock without making that obvious. How > > > about the attached instead? > > > > > > ceph_pg_pool_flags() takes osdmap instead of osdc, making it clear > > > that the caller is resposibile for keeping the map stable. > > > > > > Thanks, > > > > > > Ilya > > > > net/ceph/osdmap.c > > -------------------------- > > bool ceph_pg_pool_flags(struct ceph_osdmap *map, s64 pool_id, int flag) > > { > > struct ceph_pg_pool_info *pi; > > > > /* CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL deprecated since mimic */ > > if (flag & (CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_NEARFULL)) > > if (map->flags & (CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL)) > > return true; > > > > pi = ceph_pg_pool_by_id(map, pool_id); > > if (!pi) > > return false; > > > > return pi->flags & flag; > > } > > > > fs/ceph/file.c > > ----------------- > > ceph_write_iter() { > > ... > > down_read(&osdc->lock); > > if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id, > > CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_FULL_QUOTA)) { > > err = -ENOSPC; > > up_read(&osdc->lock); > > goto out; > > } > > up_read(&osdc->lock); > > ... > > down_read(&osdc->lock); > > if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id, > > CEPH_POOL_FLAG_NEARFULL)) > > iocb->ki_flags |= IOCB_DSYNC; > > up_read(&osdc->lock); > > ... > > } > > > > how about this? > > Well, this takes osdc->lock and looks up ceph_pg_pool_info twice. > Given that these checks are inherently racy, I think doing it once > at the top makes more sense. be modified as follow. ceph_write_iter() { ... down_read(&osdc->lock); pi = ceph_pg_pool_by_id(osdc->osdmap, ci->i_layout.pool_id); if (!pi) { err = -ENOENT; up_read(&osdc->lock); goto out; } up_read(&osdc->lock); ... } > > Also, I don't think this does what you intended it to do. Your > ceph_pg_pool_flags(..., CEPH_POOL_FLAG_FULL) returns true even if > the map only has CEPH_OSDMAP_NEARFULL, triggering early ENOSPC. > Ah... my mistake. According to OSDMAP/POOL flag to do respectively. if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || ceph_pg_pool_flags(pi, CEPH_POOL_FLAG_FULL)) { err = -ENOSPC; goto out; } include/linux/ceph/osdmap.h -------------------------------------- static inline bool ceph_pg_pool_flags(struct ceph_pg_pool_info *pi, int flag) { return pi->flags & flag; } Thanks. ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag 2020-03-12 8:40 ` Yanhu Cao @ 2020-03-12 14:33 ` Ilya Dryomov 0 siblings, 0 replies; 6+ messages in thread From: Ilya Dryomov @ 2020-03-12 14:33 UTC (permalink / raw) To: Yanhu Cao Cc: Jeff Layton, Sage Weil, David S. Miller, kuba, Ceph Development, LKML, netdev On Thu, Mar 12, 2020 at 9:40 AM Yanhu Cao <gmayyyha@gmail.com> wrote: > > On Wed, Mar 11, 2020 at 9:41 PM Ilya Dryomov <idryomov@gmail.com> wrote: > > > > On Wed, Mar 11, 2020 at 10:55 AM Yanhu Cao <gmayyyha@gmail.com> wrote: > > > > > > On Tue, Mar 10, 2020 at 4:43 AM Ilya Dryomov <idryomov@gmail.com> wrote: > > > > > > > > On Tue, Mar 3, 2020 at 10:33 AM Yanhu Cao <gmayyyha@gmail.com> wrote: > > > > > > > > > > CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it > > > > > does not work well in new versions, added POOL flags to handle it. > > > > > > > > > > Signed-off-by: Yanhu Cao <gmayyyha@gmail.com> > > > > > --- > > > > > fs/ceph/file.c | 9 +++++++-- > > > > > include/linux/ceph/osd_client.h | 2 ++ > > > > > include/linux/ceph/osdmap.h | 3 ++- > > > > > net/ceph/osd_client.c | 23 +++++++++++++---------- > > > > > 4 files changed, 24 insertions(+), 13 deletions(-) > > > > > > > > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > > > > > index 7e0190b1f821..84ec44f9d77a 100644 > > > > > --- a/fs/ceph/file.c > > > > > +++ b/fs/ceph/file.c > > > > > @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > > > > > } > > > > > > > > > > /* FIXME: not complete since it doesn't account for being at quota */ > > > > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { > > > > > + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) || > > > > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, > > > > > + CEPH_POOL_FLAG_FULL)) { > > > > > err = -ENOSPC; > > > > > goto out; > > > > > } > > > > > @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) > > > > > } > > > > > > > > > > if (written >= 0) { > > > > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) > > > > > + if (ceph_osdmap_flag(&fsc->client->osdc, > > > > > + CEPH_OSDMAP_NEARFULL) || > > > > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id, > > > > > + CEPH_POOL_FLAG_NEARFULL)) > > > > > iocb->ki_flags |= IOCB_DSYNC; > > > > > written = generic_write_sync(iocb, written); > > > > > } > > > > > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h > > > > > index 5a62dbd3f4c2..be9007b93862 100644 > > > > > --- a/include/linux/ceph/osd_client.h > > > > > +++ b/include/linux/ceph/osd_client.h > > > > > @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) > > > > > return osdc->osdmap->flags & flag; > > > > > } > > > > > > > > > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag); > > > > > + > > > > > extern int ceph_osdc_setup(void); > > > > > extern void ceph_osdc_cleanup(void); > > > > > > > > > > diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h > > > > > index e081b56f1c1d..88faacc11f55 100644 > > > > > --- a/include/linux/ceph/osdmap.h > > > > > +++ b/include/linux/ceph/osdmap.h > > > > > @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); > > > > > > > > > > #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id > > > > > together */ > > > > > -#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ > > > > > +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ > > > > > +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */ > > > > > > > > > > struct ceph_pg_pool_info { > > > > > struct rb_node node; > > > > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > > > > > index b68b376d8c2f..9ad2b96c3e78 100644 > > > > > --- a/net/ceph/osd_client.c > > > > > +++ b/net/ceph/osd_client.c > > > > > @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req) > > > > > atomic_dec(&osd->o_osdc->num_homeless); > > > > > } > > > > > > > > > > -static bool __pool_full(struct ceph_pg_pool_info *pi) > > > > > +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag) > > > > > { > > > > > - return pi->flags & CEPH_POOL_FLAG_FULL; > > > > > + return pi->flags & flag; > > > > > } > > > > > > > > > > static bool have_pool_full(struct ceph_osd_client *osdc) > > > > > @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc) > > > > > struct ceph_pg_pool_info *pi = > > > > > rb_entry(n, struct ceph_pg_pool_info, node); > > > > > > > > > > - if (__pool_full(pi)) > > > > > + if (__pool_flag(pi, CEPH_POOL_FLAG_FULL)) > > > > > return true; > > > > > } > > > > > > > > > > return false; > > > > > } > > > > > > > > > > -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) > > > > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag) > > > > > { > > > > > struct ceph_pg_pool_info *pi; > > > > > > > > > > @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) > > > > > if (!pi) > > > > > return false; > > > > > > > > > > - return __pool_full(pi); > > > > > + return __pool_flag(pi, flag); > > > > > } > > > > > +EXPORT_SYMBOL(pool_flag); > > > > > + > > > > > > > > > > /* > > > > > * Returns whether a request should be blocked from being sent > > > > > @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc, > > > > > bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); > > > > > bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || > > > > > ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > > > > - __pool_full(pi); > > > > > + __pool_flag(pi, CEPH_POOL_FLAG_FULL); > > > > > > > > > > WARN_ON(pi->id != t->target_oloc.pool); > > > > > return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) || > > > > > @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) > > > > > !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY | > > > > > CEPH_OSD_FLAG_FULL_FORCE)) && > > > > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > > > > - pool_full(osdc, req->r_t.base_oloc.pool))) { > > > > > + pool_flag(osdc, req->r_t.base_oloc.pool, > > > > > + CEPH_POOL_FLAG_FULL))) { > > > > > dout("req %p full/pool_full\n", req); > > > > > if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) { > > > > > err = -ENOSPC; > > > > > @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg) > > > > > > > > > > if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && > > > > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > > > > > - pool_full(osdc, req->r_t.base_oloc.pool))) { > > > > > + pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) { > > > > > if (!*victims) { > > > > > update_epoch_barrier(osdc, osdc->osdmap->epoch); > > > > > *victims = true; > > > > > @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc) > > > > > struct ceph_pg_pool_info *pi = > > > > > rb_entry(n, struct ceph_pg_pool_info, node); > > > > > > > > > > - pi->was_full = __pool_full(pi); > > > > > + pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL); > > > > > } > > > > > } > > > > > > > > > > @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id) > > > > > if (!pi) > > > > > return false; > > > > > > > > > > - return pi->was_full && !__pool_full(pi); > > > > > + return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL); > > > > > } > > > > > > > > > > static enum calc_target_result > > > > > > > > Hi Yanhu, > > > > > > > > Sorry for a late reply. > > > > > > > > This adds some unnecessary churn and also exposes a helper that > > > > must be called under osdc->lock without making that obvious. How > > > > about the attached instead? > > > > > > > > ceph_pg_pool_flags() takes osdmap instead of osdc, making it clear > > > > that the caller is resposibile for keeping the map stable. > > > > > > > > Thanks, > > > > > > > > Ilya > > > > > > net/ceph/osdmap.c > > > -------------------------- > > > bool ceph_pg_pool_flags(struct ceph_osdmap *map, s64 pool_id, int flag) > > > { > > > struct ceph_pg_pool_info *pi; > > > > > > /* CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL deprecated since mimic */ > > > if (flag & (CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_NEARFULL)) > > > if (map->flags & (CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL)) > > > return true; > > > > > > pi = ceph_pg_pool_by_id(map, pool_id); > > > if (!pi) > > > return false; > > > > > > return pi->flags & flag; > > > } > > > > > > fs/ceph/file.c > > > ----------------- > > > ceph_write_iter() { > > > ... > > > down_read(&osdc->lock); > > > if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id, > > > CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_FULL_QUOTA)) { > > > err = -ENOSPC; > > > up_read(&osdc->lock); > > > goto out; > > > } > > > up_read(&osdc->lock); > > > ... > > > down_read(&osdc->lock); > > > if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id, > > > CEPH_POOL_FLAG_NEARFULL)) > > > iocb->ki_flags |= IOCB_DSYNC; > > > up_read(&osdc->lock); > > > ... > > > } > > > > > > how about this? > > > > Well, this takes osdc->lock and looks up ceph_pg_pool_info twice. > > Given that these checks are inherently racy, I think doing it once > > at the top makes more sense. > > be modified as follow. > > ceph_write_iter() { > ... > down_read(&osdc->lock); > pi = ceph_pg_pool_by_id(osdc->osdmap, ci->i_layout.pool_id); > if (!pi) { > err = -ENOENT; > up_read(&osdc->lock); > goto out; > } > up_read(&osdc->lock); > ... > } No, this won't work because as soon as you release osdc->lock, pi may get invalidated. > > > > > Also, I don't think this does what you intended it to do. Your > > ceph_pg_pool_flags(..., CEPH_POOL_FLAG_FULL) returns true even if > > the map only has CEPH_OSDMAP_NEARFULL, triggering early ENOSPC. > > > > Ah... my mistake. According to OSDMAP/POOL flag to do respectively. > > if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || > ceph_pg_pool_flags(pi, CEPH_POOL_FLAG_FULL)) { ... and you may reference invalid memory here. > err = -ENOSPC; > goto out; > } > > include/linux/ceph/osdmap.h > -------------------------------------- > static inline bool ceph_pg_pool_flags(struct ceph_pg_pool_info *pi, int flag) > { > return pi->flags & flag; > } I'll go ahead with my version. Thanks, Ilya ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2020-03-12 14:33 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-03-03 9:33 [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag Yanhu Cao 2020-03-09 20:43 ` Ilya Dryomov 2020-03-11 9:55 ` Yanhu Cao 2020-03-11 13:41 ` Ilya Dryomov 2020-03-12 8:40 ` Yanhu Cao 2020-03-12 14:33 ` Ilya Dryomov
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).