LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag
@ 2020-03-03 9:33 Yanhu Cao
2020-03-09 20:43 ` Ilya Dryomov
0 siblings, 1 reply; 6+ messages in thread
From: Yanhu Cao @ 2020-03-03 9:33 UTC (permalink / raw)
To: jlayton
Cc: sage, idryomov, davem, kuba, ceph-devel, linux-kernel, netdev, Yanhu Cao
CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it
does not work well in new versions, added POOL flags to handle it.
Signed-off-by: Yanhu Cao <gmayyyha@gmail.com>
---
fs/ceph/file.c | 9 +++++++--
include/linux/ceph/osd_client.h | 2 ++
include/linux/ceph/osdmap.h | 3 ++-
net/ceph/osd_client.c | 23 +++++++++++++----------
4 files changed, 24 insertions(+), 13 deletions(-)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7e0190b1f821..84ec44f9d77a 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
/* FIXME: not complete since it doesn't account for being at quota */
- if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
+ if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) ||
+ pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
+ CEPH_POOL_FLAG_FULL)) {
err = -ENOSPC;
goto out;
}
@@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
if (written >= 0) {
- if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
+ if (ceph_osdmap_flag(&fsc->client->osdc,
+ CEPH_OSDMAP_NEARFULL) ||
+ pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
+ CEPH_POOL_FLAG_NEARFULL))
iocb->ki_flags |= IOCB_DSYNC;
written = generic_write_sync(iocb, written);
}
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 5a62dbd3f4c2..be9007b93862 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
return osdc->osdmap->flags & flag;
}
+bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag);
+
extern int ceph_osdc_setup(void);
extern void ceph_osdc_cleanup(void);
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index e081b56f1c1d..88faacc11f55 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
#define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
together */
-#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
+#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
+#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */
struct ceph_pg_pool_info {
struct rb_node node;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b68b376d8c2f..9ad2b96c3e78 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
atomic_dec(&osd->o_osdc->num_homeless);
}
-static bool __pool_full(struct ceph_pg_pool_info *pi)
+static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag)
{
- return pi->flags & CEPH_POOL_FLAG_FULL;
+ return pi->flags & flag;
}
static bool have_pool_full(struct ceph_osd_client *osdc)
@@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc)
struct ceph_pg_pool_info *pi =
rb_entry(n, struct ceph_pg_pool_info, node);
- if (__pool_full(pi))
+ if (__pool_flag(pi, CEPH_POOL_FLAG_FULL))
return true;
}
return false;
}
-static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
+bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag)
{
struct ceph_pg_pool_info *pi;
@@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
if (!pi)
return false;
- return __pool_full(pi);
+ return __pool_flag(pi, flag);
}
+EXPORT_SYMBOL(pool_flag);
+
/*
* Returns whether a request should be blocked from being sent
@@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD);
bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) ||
ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
- __pool_full(pi);
+ __pool_flag(pi, CEPH_POOL_FLAG_FULL);
WARN_ON(pi->id != t->target_oloc.pool);
return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
@@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
!(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY |
CEPH_OSD_FLAG_FULL_FORCE)) &&
(ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
- pool_full(osdc, req->r_t.base_oloc.pool))) {
+ pool_flag(osdc, req->r_t.base_oloc.pool,
+ CEPH_POOL_FLAG_FULL))) {
dout("req %p full/pool_full\n", req);
if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
err = -ENOSPC;
@@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg)
if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
(ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
- pool_full(osdc, req->r_t.base_oloc.pool))) {
+ pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) {
if (!*victims) {
update_epoch_barrier(osdc, osdc->osdmap->epoch);
*victims = true;
@@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc)
struct ceph_pg_pool_info *pi =
rb_entry(n, struct ceph_pg_pool_info, node);
- pi->was_full = __pool_full(pi);
+ pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL);
}
}
@@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id)
if (!pi)
return false;
- return pi->was_full && !__pool_full(pi);
+ return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL);
}
static enum calc_target_result
--
2.21.1
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag
2020-03-03 9:33 [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag Yanhu Cao
@ 2020-03-09 20:43 ` Ilya Dryomov
2020-03-11 9:55 ` Yanhu Cao
0 siblings, 1 reply; 6+ messages in thread
From: Ilya Dryomov @ 2020-03-09 20:43 UTC (permalink / raw)
To: Yanhu Cao
Cc: Jeff Layton, Sage Weil, David S. Miller, kuba, Ceph Development,
LKML, netdev
[-- Attachment #1: Type: text/plain, Size: 7255 bytes --]
On Tue, Mar 3, 2020 at 10:33 AM Yanhu Cao <gmayyyha@gmail.com> wrote:
>
> CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it
> does not work well in new versions, added POOL flags to handle it.
>
> Signed-off-by: Yanhu Cao <gmayyyha@gmail.com>
> ---
> fs/ceph/file.c | 9 +++++++--
> include/linux/ceph/osd_client.h | 2 ++
> include/linux/ceph/osdmap.h | 3 ++-
> net/ceph/osd_client.c | 23 +++++++++++++----------
> 4 files changed, 24 insertions(+), 13 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 7e0190b1f821..84ec44f9d77a 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> }
>
> /* FIXME: not complete since it doesn't account for being at quota */
> - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
> + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) ||
> + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> + CEPH_POOL_FLAG_FULL)) {
> err = -ENOSPC;
> goto out;
> }
> @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> }
>
> if (written >= 0) {
> - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
> + if (ceph_osdmap_flag(&fsc->client->osdc,
> + CEPH_OSDMAP_NEARFULL) ||
> + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> + CEPH_POOL_FLAG_NEARFULL))
> iocb->ki_flags |= IOCB_DSYNC;
> written = generic_write_sync(iocb, written);
> }
> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> index 5a62dbd3f4c2..be9007b93862 100644
> --- a/include/linux/ceph/osd_client.h
> +++ b/include/linux/ceph/osd_client.h
> @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
> return osdc->osdmap->flags & flag;
> }
>
> +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag);
> +
> extern int ceph_osdc_setup(void);
> extern void ceph_osdc_cleanup(void);
>
> diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
> index e081b56f1c1d..88faacc11f55 100644
> --- a/include/linux/ceph/osdmap.h
> +++ b/include/linux/ceph/osdmap.h
> @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
>
> #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
> together */
> -#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
> +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
> +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */
>
> struct ceph_pg_pool_info {
> struct rb_node node;
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index b68b376d8c2f..9ad2b96c3e78 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
> atomic_dec(&osd->o_osdc->num_homeless);
> }
>
> -static bool __pool_full(struct ceph_pg_pool_info *pi)
> +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag)
> {
> - return pi->flags & CEPH_POOL_FLAG_FULL;
> + return pi->flags & flag;
> }
>
> static bool have_pool_full(struct ceph_osd_client *osdc)
> @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc)
> struct ceph_pg_pool_info *pi =
> rb_entry(n, struct ceph_pg_pool_info, node);
>
> - if (__pool_full(pi))
> + if (__pool_flag(pi, CEPH_POOL_FLAG_FULL))
> return true;
> }
>
> return false;
> }
>
> -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
> +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag)
> {
> struct ceph_pg_pool_info *pi;
>
> @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
> if (!pi)
> return false;
>
> - return __pool_full(pi);
> + return __pool_flag(pi, flag);
> }
> +EXPORT_SYMBOL(pool_flag);
> +
>
> /*
> * Returns whether a request should be blocked from being sent
> @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
> bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD);
> bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) ||
> ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> - __pool_full(pi);
> + __pool_flag(pi, CEPH_POOL_FLAG_FULL);
>
> WARN_ON(pi->id != t->target_oloc.pool);
> return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
> @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
> !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY |
> CEPH_OSD_FLAG_FULL_FORCE)) &&
> (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> - pool_full(osdc, req->r_t.base_oloc.pool))) {
> + pool_flag(osdc, req->r_t.base_oloc.pool,
> + CEPH_POOL_FLAG_FULL))) {
> dout("req %p full/pool_full\n", req);
> if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
> err = -ENOSPC;
> @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg)
>
> if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
> (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> - pool_full(osdc, req->r_t.base_oloc.pool))) {
> + pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) {
> if (!*victims) {
> update_epoch_barrier(osdc, osdc->osdmap->epoch);
> *victims = true;
> @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc)
> struct ceph_pg_pool_info *pi =
> rb_entry(n, struct ceph_pg_pool_info, node);
>
> - pi->was_full = __pool_full(pi);
> + pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL);
> }
> }
>
> @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id)
> if (!pi)
> return false;
>
> - return pi->was_full && !__pool_full(pi);
> + return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL);
> }
>
> static enum calc_target_result
Hi Yanhu,
Sorry for a late reply.
This adds some unnecessary churn and also exposes a helper that
must be called under osdc->lock without making that obvious. How
about the attached instead?
ceph_pg_pool_flags() takes osdmap instead of osdc, making it clear
that the caller is resposibile for keeping the map stable.
Thanks,
Ilya
[-- Attachment #2: full-nearfull.patch --]
[-- Type: text/x-patch, Size: 3026 bytes --]
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ba46ba740628..c8d84e90a371 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1663,10 +1663,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
int err, want, got;
bool direct_lock = false;
+ u32 map_flags;
+ u64 pool_flags;
loff_t pos;
loff_t limit = max(i_size_read(inode), fsc->max_file_size);
@@ -1730,7 +1733,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
/* FIXME: not complete since it doesn't account for being at quota */
- if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
+ down_read(&osdc->lock);
+ map_flags = osdc->osdmap->flags;
+ pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
+ up_read(&osdc->lock);
+ if ((map_flags & CEPH_OSDMAP_FULL) ||
+ (pool_flags & CEPH_POOL_FLAG_FULL)) {
err = -ENOSPC;
goto out;
}
@@ -1823,7 +1831,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
if (written >= 0) {
- if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
+ if ((map_flags & CEPH_OSDMAP_NEARFULL) ||
+ (pool_flags & CEPH_POOL_FLAG_NEARFULL))
iocb->ki_flags |= IOCB_DSYNC;
written = generic_write_sync(iocb, written);
}
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index e081b56f1c1d..651e1b967608 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -37,6 +37,7 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
#define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
together */
#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
+#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */
struct ceph_pg_pool_info {
struct rb_node node;
@@ -304,5 +305,6 @@ extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
+u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id);
#endif
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4e0de14f80bb..2a6e63a8edbe 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -710,6 +710,15 @@ int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
}
EXPORT_SYMBOL(ceph_pg_poolid_by_name);
+u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id)
+{
+ struct ceph_pg_pool_info *pi;
+
+ pi = __lookup_pg_pool(&map->pg_pools, id);
+ return pi ? pi->flags : 0;
+}
+EXPORT_SYMBOL(ceph_pg_pool_flags);
+
static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
{
rb_erase(&pi->node, root);
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag
2020-03-09 20:43 ` Ilya Dryomov
@ 2020-03-11 9:55 ` Yanhu Cao
2020-03-11 13:41 ` Ilya Dryomov
0 siblings, 1 reply; 6+ messages in thread
From: Yanhu Cao @ 2020-03-11 9:55 UTC (permalink / raw)
To: Ilya Dryomov
Cc: Jeff Layton, Sage Weil, David S. Miller, kuba, Ceph Development,
LKML, netdev
On Tue, Mar 10, 2020 at 4:43 AM Ilya Dryomov <idryomov@gmail.com> wrote:
>
> On Tue, Mar 3, 2020 at 10:33 AM Yanhu Cao <gmayyyha@gmail.com> wrote:
> >
> > CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it
> > does not work well in new versions, added POOL flags to handle it.
> >
> > Signed-off-by: Yanhu Cao <gmayyyha@gmail.com>
> > ---
> > fs/ceph/file.c | 9 +++++++--
> > include/linux/ceph/osd_client.h | 2 ++
> > include/linux/ceph/osdmap.h | 3 ++-
> > net/ceph/osd_client.c | 23 +++++++++++++----------
> > 4 files changed, 24 insertions(+), 13 deletions(-)
> >
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index 7e0190b1f821..84ec44f9d77a 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > }
> >
> > /* FIXME: not complete since it doesn't account for being at quota */
> > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
> > + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) ||
> > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> > + CEPH_POOL_FLAG_FULL)) {
> > err = -ENOSPC;
> > goto out;
> > }
> > @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > }
> >
> > if (written >= 0) {
> > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
> > + if (ceph_osdmap_flag(&fsc->client->osdc,
> > + CEPH_OSDMAP_NEARFULL) ||
> > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> > + CEPH_POOL_FLAG_NEARFULL))
> > iocb->ki_flags |= IOCB_DSYNC;
> > written = generic_write_sync(iocb, written);
> > }
> > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> > index 5a62dbd3f4c2..be9007b93862 100644
> > --- a/include/linux/ceph/osd_client.h
> > +++ b/include/linux/ceph/osd_client.h
> > @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
> > return osdc->osdmap->flags & flag;
> > }
> >
> > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag);
> > +
> > extern int ceph_osdc_setup(void);
> > extern void ceph_osdc_cleanup(void);
> >
> > diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
> > index e081b56f1c1d..88faacc11f55 100644
> > --- a/include/linux/ceph/osdmap.h
> > +++ b/include/linux/ceph/osdmap.h
> > @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
> >
> > #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
> > together */
> > -#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
> > +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
> > +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */
> >
> > struct ceph_pg_pool_info {
> > struct rb_node node;
> > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> > index b68b376d8c2f..9ad2b96c3e78 100644
> > --- a/net/ceph/osd_client.c
> > +++ b/net/ceph/osd_client.c
> > @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
> > atomic_dec(&osd->o_osdc->num_homeless);
> > }
> >
> > -static bool __pool_full(struct ceph_pg_pool_info *pi)
> > +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag)
> > {
> > - return pi->flags & CEPH_POOL_FLAG_FULL;
> > + return pi->flags & flag;
> > }
> >
> > static bool have_pool_full(struct ceph_osd_client *osdc)
> > @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc)
> > struct ceph_pg_pool_info *pi =
> > rb_entry(n, struct ceph_pg_pool_info, node);
> >
> > - if (__pool_full(pi))
> > + if (__pool_flag(pi, CEPH_POOL_FLAG_FULL))
> > return true;
> > }
> >
> > return false;
> > }
> >
> > -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
> > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag)
> > {
> > struct ceph_pg_pool_info *pi;
> >
> > @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
> > if (!pi)
> > return false;
> >
> > - return __pool_full(pi);
> > + return __pool_flag(pi, flag);
> > }
> > +EXPORT_SYMBOL(pool_flag);
> > +
> >
> > /*
> > * Returns whether a request should be blocked from being sent
> > @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
> > bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD);
> > bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) ||
> > ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > - __pool_full(pi);
> > + __pool_flag(pi, CEPH_POOL_FLAG_FULL);
> >
> > WARN_ON(pi->id != t->target_oloc.pool);
> > return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
> > @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
> > !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY |
> > CEPH_OSD_FLAG_FULL_FORCE)) &&
> > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > - pool_full(osdc, req->r_t.base_oloc.pool))) {
> > + pool_flag(osdc, req->r_t.base_oloc.pool,
> > + CEPH_POOL_FLAG_FULL))) {
> > dout("req %p full/pool_full\n", req);
> > if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
> > err = -ENOSPC;
> > @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg)
> >
> > if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
> > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > - pool_full(osdc, req->r_t.base_oloc.pool))) {
> > + pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) {
> > if (!*victims) {
> > update_epoch_barrier(osdc, osdc->osdmap->epoch);
> > *victims = true;
> > @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc)
> > struct ceph_pg_pool_info *pi =
> > rb_entry(n, struct ceph_pg_pool_info, node);
> >
> > - pi->was_full = __pool_full(pi);
> > + pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL);
> > }
> > }
> >
> > @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id)
> > if (!pi)
> > return false;
> >
> > - return pi->was_full && !__pool_full(pi);
> > + return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL);
> > }
> >
> > static enum calc_target_result
>
> Hi Yanhu,
>
> Sorry for a late reply.
>
> This adds some unnecessary churn and also exposes a helper that
> must be called under osdc->lock without making that obvious. How
> about the attached instead?
>
> ceph_pg_pool_flags() takes osdmap instead of osdc, making it clear
> that the caller is resposibile for keeping the map stable.
>
> Thanks,
>
> Ilya
net/ceph/osdmap.c
--------------------------
bool ceph_pg_pool_flags(struct ceph_osdmap *map, s64 pool_id, int flag)
{
struct ceph_pg_pool_info *pi;
/* CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL deprecated since mimic */
if (flag & (CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_NEARFULL))
if (map->flags & (CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL))
return true;
pi = ceph_pg_pool_by_id(map, pool_id);
if (!pi)
return false;
return pi->flags & flag;
}
fs/ceph/file.c
-----------------
ceph_write_iter() {
...
down_read(&osdc->lock);
if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id,
CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_FULL_QUOTA)) {
err = -ENOSPC;
up_read(&osdc->lock);
goto out;
}
up_read(&osdc->lock);
...
down_read(&osdc->lock);
if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id,
CEPH_POOL_FLAG_NEARFULL))
iocb->ki_flags |= IOCB_DSYNC;
up_read(&osdc->lock);
...
}
how about this?
Thanks.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag
2020-03-11 9:55 ` Yanhu Cao
@ 2020-03-11 13:41 ` Ilya Dryomov
2020-03-12 8:40 ` Yanhu Cao
0 siblings, 1 reply; 6+ messages in thread
From: Ilya Dryomov @ 2020-03-11 13:41 UTC (permalink / raw)
To: Yanhu Cao
Cc: Jeff Layton, Sage Weil, David S. Miller, kuba, Ceph Development,
LKML, netdev
On Wed, Mar 11, 2020 at 10:55 AM Yanhu Cao <gmayyyha@gmail.com> wrote:
>
> On Tue, Mar 10, 2020 at 4:43 AM Ilya Dryomov <idryomov@gmail.com> wrote:
> >
> > On Tue, Mar 3, 2020 at 10:33 AM Yanhu Cao <gmayyyha@gmail.com> wrote:
> > >
> > > CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it
> > > does not work well in new versions, added POOL flags to handle it.
> > >
> > > Signed-off-by: Yanhu Cao <gmayyyha@gmail.com>
> > > ---
> > > fs/ceph/file.c | 9 +++++++--
> > > include/linux/ceph/osd_client.h | 2 ++
> > > include/linux/ceph/osdmap.h | 3 ++-
> > > net/ceph/osd_client.c | 23 +++++++++++++----------
> > > 4 files changed, 24 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > > index 7e0190b1f821..84ec44f9d77a 100644
> > > --- a/fs/ceph/file.c
> > > +++ b/fs/ceph/file.c
> > > @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > > }
> > >
> > > /* FIXME: not complete since it doesn't account for being at quota */
> > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
> > > + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) ||
> > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> > > + CEPH_POOL_FLAG_FULL)) {
> > > err = -ENOSPC;
> > > goto out;
> > > }
> > > @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > > }
> > >
> > > if (written >= 0) {
> > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
> > > + if (ceph_osdmap_flag(&fsc->client->osdc,
> > > + CEPH_OSDMAP_NEARFULL) ||
> > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> > > + CEPH_POOL_FLAG_NEARFULL))
> > > iocb->ki_flags |= IOCB_DSYNC;
> > > written = generic_write_sync(iocb, written);
> > > }
> > > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> > > index 5a62dbd3f4c2..be9007b93862 100644
> > > --- a/include/linux/ceph/osd_client.h
> > > +++ b/include/linux/ceph/osd_client.h
> > > @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
> > > return osdc->osdmap->flags & flag;
> > > }
> > >
> > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag);
> > > +
> > > extern int ceph_osdc_setup(void);
> > > extern void ceph_osdc_cleanup(void);
> > >
> > > diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
> > > index e081b56f1c1d..88faacc11f55 100644
> > > --- a/include/linux/ceph/osdmap.h
> > > +++ b/include/linux/ceph/osdmap.h
> > > @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
> > >
> > > #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
> > > together */
> > > -#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
> > > +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
> > > +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */
> > >
> > > struct ceph_pg_pool_info {
> > > struct rb_node node;
> > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> > > index b68b376d8c2f..9ad2b96c3e78 100644
> > > --- a/net/ceph/osd_client.c
> > > +++ b/net/ceph/osd_client.c
> > > @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
> > > atomic_dec(&osd->o_osdc->num_homeless);
> > > }
> > >
> > > -static bool __pool_full(struct ceph_pg_pool_info *pi)
> > > +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag)
> > > {
> > > - return pi->flags & CEPH_POOL_FLAG_FULL;
> > > + return pi->flags & flag;
> > > }
> > >
> > > static bool have_pool_full(struct ceph_osd_client *osdc)
> > > @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc)
> > > struct ceph_pg_pool_info *pi =
> > > rb_entry(n, struct ceph_pg_pool_info, node);
> > >
> > > - if (__pool_full(pi))
> > > + if (__pool_flag(pi, CEPH_POOL_FLAG_FULL))
> > > return true;
> > > }
> > >
> > > return false;
> > > }
> > >
> > > -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
> > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag)
> > > {
> > > struct ceph_pg_pool_info *pi;
> > >
> > > @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
> > > if (!pi)
> > > return false;
> > >
> > > - return __pool_full(pi);
> > > + return __pool_flag(pi, flag);
> > > }
> > > +EXPORT_SYMBOL(pool_flag);
> > > +
> > >
> > > /*
> > > * Returns whether a request should be blocked from being sent
> > > @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
> > > bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD);
> > > bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) ||
> > > ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > > - __pool_full(pi);
> > > + __pool_flag(pi, CEPH_POOL_FLAG_FULL);
> > >
> > > WARN_ON(pi->id != t->target_oloc.pool);
> > > return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
> > > @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
> > > !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY |
> > > CEPH_OSD_FLAG_FULL_FORCE)) &&
> > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > > - pool_full(osdc, req->r_t.base_oloc.pool))) {
> > > + pool_flag(osdc, req->r_t.base_oloc.pool,
> > > + CEPH_POOL_FLAG_FULL))) {
> > > dout("req %p full/pool_full\n", req);
> > > if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
> > > err = -ENOSPC;
> > > @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg)
> > >
> > > if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
> > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > > - pool_full(osdc, req->r_t.base_oloc.pool))) {
> > > + pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) {
> > > if (!*victims) {
> > > update_epoch_barrier(osdc, osdc->osdmap->epoch);
> > > *victims = true;
> > > @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc)
> > > struct ceph_pg_pool_info *pi =
> > > rb_entry(n, struct ceph_pg_pool_info, node);
> > >
> > > - pi->was_full = __pool_full(pi);
> > > + pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL);
> > > }
> > > }
> > >
> > > @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id)
> > > if (!pi)
> > > return false;
> > >
> > > - return pi->was_full && !__pool_full(pi);
> > > + return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL);
> > > }
> > >
> > > static enum calc_target_result
> >
> > Hi Yanhu,
> >
> > Sorry for a late reply.
> >
> > This adds some unnecessary churn and also exposes a helper that
> > must be called under osdc->lock without making that obvious. How
> > about the attached instead?
> >
> > ceph_pg_pool_flags() takes osdmap instead of osdc, making it clear
> > that the caller is resposibile for keeping the map stable.
> >
> > Thanks,
> >
> > Ilya
>
> net/ceph/osdmap.c
> --------------------------
> bool ceph_pg_pool_flags(struct ceph_osdmap *map, s64 pool_id, int flag)
> {
> struct ceph_pg_pool_info *pi;
>
> /* CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL deprecated since mimic */
> if (flag & (CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_NEARFULL))
> if (map->flags & (CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL))
> return true;
>
> pi = ceph_pg_pool_by_id(map, pool_id);
> if (!pi)
> return false;
>
> return pi->flags & flag;
> }
>
> fs/ceph/file.c
> -----------------
> ceph_write_iter() {
> ...
> down_read(&osdc->lock);
> if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id,
> CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_FULL_QUOTA)) {
> err = -ENOSPC;
> up_read(&osdc->lock);
> goto out;
> }
> up_read(&osdc->lock);
> ...
> down_read(&osdc->lock);
> if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id,
> CEPH_POOL_FLAG_NEARFULL))
> iocb->ki_flags |= IOCB_DSYNC;
> up_read(&osdc->lock);
> ...
> }
>
> how about this?
Well, this takes osdc->lock and looks up ceph_pg_pool_info twice.
Given that these checks are inherently racy, I think doing it once
at the top makes more sense.
Also, I don't think this does what you intended it to do. Your
ceph_pg_pool_flags(..., CEPH_POOL_FLAG_FULL) returns true even if
the map only has CEPH_OSDMAP_NEARFULL, triggering early ENOSPC.
Checking CEPH_POOL_FLAG_FULL_QUOTA is not necessary, because it
is set together with CEPH_POOL_FLAG_FULL:
src/osd/osd_types.h:
1199 FLAG_FULL_QUOTA = 1<<10, // pool is currently running out of
quota, will set FLAG_FULL too
Thanks,
Ilya
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag
2020-03-11 13:41 ` Ilya Dryomov
@ 2020-03-12 8:40 ` Yanhu Cao
2020-03-12 14:33 ` Ilya Dryomov
0 siblings, 1 reply; 6+ messages in thread
From: Yanhu Cao @ 2020-03-12 8:40 UTC (permalink / raw)
To: Ilya Dryomov
Cc: Jeff Layton, Sage Weil, David S. Miller, kuba, Ceph Development,
LKML, netdev
On Wed, Mar 11, 2020 at 9:41 PM Ilya Dryomov <idryomov@gmail.com> wrote:
>
> On Wed, Mar 11, 2020 at 10:55 AM Yanhu Cao <gmayyyha@gmail.com> wrote:
> >
> > On Tue, Mar 10, 2020 at 4:43 AM Ilya Dryomov <idryomov@gmail.com> wrote:
> > >
> > > On Tue, Mar 3, 2020 at 10:33 AM Yanhu Cao <gmayyyha@gmail.com> wrote:
> > > >
> > > > CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it
> > > > does not work well in new versions, added POOL flags to handle it.
> > > >
> > > > Signed-off-by: Yanhu Cao <gmayyyha@gmail.com>
> > > > ---
> > > > fs/ceph/file.c | 9 +++++++--
> > > > include/linux/ceph/osd_client.h | 2 ++
> > > > include/linux/ceph/osdmap.h | 3 ++-
> > > > net/ceph/osd_client.c | 23 +++++++++++++----------
> > > > 4 files changed, 24 insertions(+), 13 deletions(-)
> > > >
> > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > > > index 7e0190b1f821..84ec44f9d77a 100644
> > > > --- a/fs/ceph/file.c
> > > > +++ b/fs/ceph/file.c
> > > > @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > > > }
> > > >
> > > > /* FIXME: not complete since it doesn't account for being at quota */
> > > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
> > > > + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) ||
> > > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> > > > + CEPH_POOL_FLAG_FULL)) {
> > > > err = -ENOSPC;
> > > > goto out;
> > > > }
> > > > @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > > > }
> > > >
> > > > if (written >= 0) {
> > > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
> > > > + if (ceph_osdmap_flag(&fsc->client->osdc,
> > > > + CEPH_OSDMAP_NEARFULL) ||
> > > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> > > > + CEPH_POOL_FLAG_NEARFULL))
> > > > iocb->ki_flags |= IOCB_DSYNC;
> > > > written = generic_write_sync(iocb, written);
> > > > }
> > > > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> > > > index 5a62dbd3f4c2..be9007b93862 100644
> > > > --- a/include/linux/ceph/osd_client.h
> > > > +++ b/include/linux/ceph/osd_client.h
> > > > @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
> > > > return osdc->osdmap->flags & flag;
> > > > }
> > > >
> > > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag);
> > > > +
> > > > extern int ceph_osdc_setup(void);
> > > > extern void ceph_osdc_cleanup(void);
> > > >
> > > > diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
> > > > index e081b56f1c1d..88faacc11f55 100644
> > > > --- a/include/linux/ceph/osdmap.h
> > > > +++ b/include/linux/ceph/osdmap.h
> > > > @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
> > > >
> > > > #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
> > > > together */
> > > > -#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
> > > > +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
> > > > +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */
> > > >
> > > > struct ceph_pg_pool_info {
> > > > struct rb_node node;
> > > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> > > > index b68b376d8c2f..9ad2b96c3e78 100644
> > > > --- a/net/ceph/osd_client.c
> > > > +++ b/net/ceph/osd_client.c
> > > > @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
> > > > atomic_dec(&osd->o_osdc->num_homeless);
> > > > }
> > > >
> > > > -static bool __pool_full(struct ceph_pg_pool_info *pi)
> > > > +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag)
> > > > {
> > > > - return pi->flags & CEPH_POOL_FLAG_FULL;
> > > > + return pi->flags & flag;
> > > > }
> > > >
> > > > static bool have_pool_full(struct ceph_osd_client *osdc)
> > > > @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc)
> > > > struct ceph_pg_pool_info *pi =
> > > > rb_entry(n, struct ceph_pg_pool_info, node);
> > > >
> > > > - if (__pool_full(pi))
> > > > + if (__pool_flag(pi, CEPH_POOL_FLAG_FULL))
> > > > return true;
> > > > }
> > > >
> > > > return false;
> > > > }
> > > >
> > > > -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
> > > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag)
> > > > {
> > > > struct ceph_pg_pool_info *pi;
> > > >
> > > > @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
> > > > if (!pi)
> > > > return false;
> > > >
> > > > - return __pool_full(pi);
> > > > + return __pool_flag(pi, flag);
> > > > }
> > > > +EXPORT_SYMBOL(pool_flag);
> > > > +
> > > >
> > > > /*
> > > > * Returns whether a request should be blocked from being sent
> > > > @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
> > > > bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD);
> > > > bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) ||
> > > > ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > > > - __pool_full(pi);
> > > > + __pool_flag(pi, CEPH_POOL_FLAG_FULL);
> > > >
> > > > WARN_ON(pi->id != t->target_oloc.pool);
> > > > return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
> > > > @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
> > > > !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY |
> > > > CEPH_OSD_FLAG_FULL_FORCE)) &&
> > > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > > > - pool_full(osdc, req->r_t.base_oloc.pool))) {
> > > > + pool_flag(osdc, req->r_t.base_oloc.pool,
> > > > + CEPH_POOL_FLAG_FULL))) {
> > > > dout("req %p full/pool_full\n", req);
> > > > if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
> > > > err = -ENOSPC;
> > > > @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg)
> > > >
> > > > if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
> > > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > > > - pool_full(osdc, req->r_t.base_oloc.pool))) {
> > > > + pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) {
> > > > if (!*victims) {
> > > > update_epoch_barrier(osdc, osdc->osdmap->epoch);
> > > > *victims = true;
> > > > @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc)
> > > > struct ceph_pg_pool_info *pi =
> > > > rb_entry(n, struct ceph_pg_pool_info, node);
> > > >
> > > > - pi->was_full = __pool_full(pi);
> > > > + pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL);
> > > > }
> > > > }
> > > >
> > > > @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id)
> > > > if (!pi)
> > > > return false;
> > > >
> > > > - return pi->was_full && !__pool_full(pi);
> > > > + return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL);
> > > > }
> > > >
> > > > static enum calc_target_result
> > >
> > > Hi Yanhu,
> > >
> > > Sorry for a late reply.
> > >
> > > This adds some unnecessary churn and also exposes a helper that
> > > must be called under osdc->lock without making that obvious. How
> > > about the attached instead?
> > >
> > > ceph_pg_pool_flags() takes osdmap instead of osdc, making it clear
> > > that the caller is resposibile for keeping the map stable.
> > >
> > > Thanks,
> > >
> > > Ilya
> >
> > net/ceph/osdmap.c
> > --------------------------
> > bool ceph_pg_pool_flags(struct ceph_osdmap *map, s64 pool_id, int flag)
> > {
> > struct ceph_pg_pool_info *pi;
> >
> > /* CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL deprecated since mimic */
> > if (flag & (CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_NEARFULL))
> > if (map->flags & (CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL))
> > return true;
> >
> > pi = ceph_pg_pool_by_id(map, pool_id);
> > if (!pi)
> > return false;
> >
> > return pi->flags & flag;
> > }
> >
> > fs/ceph/file.c
> > -----------------
> > ceph_write_iter() {
> > ...
> > down_read(&osdc->lock);
> > if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id,
> > CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_FULL_QUOTA)) {
> > err = -ENOSPC;
> > up_read(&osdc->lock);
> > goto out;
> > }
> > up_read(&osdc->lock);
> > ...
> > down_read(&osdc->lock);
> > if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id,
> > CEPH_POOL_FLAG_NEARFULL))
> > iocb->ki_flags |= IOCB_DSYNC;
> > up_read(&osdc->lock);
> > ...
> > }
> >
> > how about this?
>
> Well, this takes osdc->lock and looks up ceph_pg_pool_info twice.
> Given that these checks are inherently racy, I think doing it once
> at the top makes more sense.
be modified as follow.
ceph_write_iter() {
...
down_read(&osdc->lock);
pi = ceph_pg_pool_by_id(osdc->osdmap, ci->i_layout.pool_id);
if (!pi) {
err = -ENOENT;
up_read(&osdc->lock);
goto out;
}
up_read(&osdc->lock);
...
}
>
> Also, I don't think this does what you intended it to do. Your
> ceph_pg_pool_flags(..., CEPH_POOL_FLAG_FULL) returns true even if
> the map only has CEPH_OSDMAP_NEARFULL, triggering early ENOSPC.
>
Ah... my mistake. According to OSDMAP/POOL flag to do respectively.
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
ceph_pg_pool_flags(pi, CEPH_POOL_FLAG_FULL)) {
err = -ENOSPC;
goto out;
}
include/linux/ceph/osdmap.h
--------------------------------------
static inline bool ceph_pg_pool_flags(struct ceph_pg_pool_info *pi, int flag)
{
return pi->flags & flag;
}
Thanks.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag
2020-03-12 8:40 ` Yanhu Cao
@ 2020-03-12 14:33 ` Ilya Dryomov
0 siblings, 0 replies; 6+ messages in thread
From: Ilya Dryomov @ 2020-03-12 14:33 UTC (permalink / raw)
To: Yanhu Cao
Cc: Jeff Layton, Sage Weil, David S. Miller, kuba, Ceph Development,
LKML, netdev
On Thu, Mar 12, 2020 at 9:40 AM Yanhu Cao <gmayyyha@gmail.com> wrote:
>
> On Wed, Mar 11, 2020 at 9:41 PM Ilya Dryomov <idryomov@gmail.com> wrote:
> >
> > On Wed, Mar 11, 2020 at 10:55 AM Yanhu Cao <gmayyyha@gmail.com> wrote:
> > >
> > > On Tue, Mar 10, 2020 at 4:43 AM Ilya Dryomov <idryomov@gmail.com> wrote:
> > > >
> > > > On Tue, Mar 3, 2020 at 10:33 AM Yanhu Cao <gmayyyha@gmail.com> wrote:
> > > > >
> > > > > CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it
> > > > > does not work well in new versions, added POOL flags to handle it.
> > > > >
> > > > > Signed-off-by: Yanhu Cao <gmayyyha@gmail.com>
> > > > > ---
> > > > > fs/ceph/file.c | 9 +++++++--
> > > > > include/linux/ceph/osd_client.h | 2 ++
> > > > > include/linux/ceph/osdmap.h | 3 ++-
> > > > > net/ceph/osd_client.c | 23 +++++++++++++----------
> > > > > 4 files changed, 24 insertions(+), 13 deletions(-)
> > > > >
> > > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > > > > index 7e0190b1f821..84ec44f9d77a 100644
> > > > > --- a/fs/ceph/file.c
> > > > > +++ b/fs/ceph/file.c
> > > > > @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > > > > }
> > > > >
> > > > > /* FIXME: not complete since it doesn't account for being at quota */
> > > > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
> > > > > + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) ||
> > > > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> > > > > + CEPH_POOL_FLAG_FULL)) {
> > > > > err = -ENOSPC;
> > > > > goto out;
> > > > > }
> > > > > @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > > > > }
> > > > >
> > > > > if (written >= 0) {
> > > > > - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
> > > > > + if (ceph_osdmap_flag(&fsc->client->osdc,
> > > > > + CEPH_OSDMAP_NEARFULL) ||
> > > > > + pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> > > > > + CEPH_POOL_FLAG_NEARFULL))
> > > > > iocb->ki_flags |= IOCB_DSYNC;
> > > > > written = generic_write_sync(iocb, written);
> > > > > }
> > > > > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> > > > > index 5a62dbd3f4c2..be9007b93862 100644
> > > > > --- a/include/linux/ceph/osd_client.h
> > > > > +++ b/include/linux/ceph/osd_client.h
> > > > > @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
> > > > > return osdc->osdmap->flags & flag;
> > > > > }
> > > > >
> > > > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag);
> > > > > +
> > > > > extern int ceph_osdc_setup(void);
> > > > > extern void ceph_osdc_cleanup(void);
> > > > >
> > > > > diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
> > > > > index e081b56f1c1d..88faacc11f55 100644
> > > > > --- a/include/linux/ceph/osdmap.h
> > > > > +++ b/include/linux/ceph/osdmap.h
> > > > > @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
> > > > >
> > > > > #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
> > > > > together */
> > > > > -#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
> > > > > +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
> > > > > +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */
> > > > >
> > > > > struct ceph_pg_pool_info {
> > > > > struct rb_node node;
> > > > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> > > > > index b68b376d8c2f..9ad2b96c3e78 100644
> > > > > --- a/net/ceph/osd_client.c
> > > > > +++ b/net/ceph/osd_client.c
> > > > > @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
> > > > > atomic_dec(&osd->o_osdc->num_homeless);
> > > > > }
> > > > >
> > > > > -static bool __pool_full(struct ceph_pg_pool_info *pi)
> > > > > +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag)
> > > > > {
> > > > > - return pi->flags & CEPH_POOL_FLAG_FULL;
> > > > > + return pi->flags & flag;
> > > > > }
> > > > >
> > > > > static bool have_pool_full(struct ceph_osd_client *osdc)
> > > > > @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc)
> > > > > struct ceph_pg_pool_info *pi =
> > > > > rb_entry(n, struct ceph_pg_pool_info, node);
> > > > >
> > > > > - if (__pool_full(pi))
> > > > > + if (__pool_flag(pi, CEPH_POOL_FLAG_FULL))
> > > > > return true;
> > > > > }
> > > > >
> > > > > return false;
> > > > > }
> > > > >
> > > > > -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
> > > > > +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag)
> > > > > {
> > > > > struct ceph_pg_pool_info *pi;
> > > > >
> > > > > @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
> > > > > if (!pi)
> > > > > return false;
> > > > >
> > > > > - return __pool_full(pi);
> > > > > + return __pool_flag(pi, flag);
> > > > > }
> > > > > +EXPORT_SYMBOL(pool_flag);
> > > > > +
> > > > >
> > > > > /*
> > > > > * Returns whether a request should be blocked from being sent
> > > > > @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
> > > > > bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD);
> > > > > bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) ||
> > > > > ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > > > > - __pool_full(pi);
> > > > > + __pool_flag(pi, CEPH_POOL_FLAG_FULL);
> > > > >
> > > > > WARN_ON(pi->id != t->target_oloc.pool);
> > > > > return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
> > > > > @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
> > > > > !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY |
> > > > > CEPH_OSD_FLAG_FULL_FORCE)) &&
> > > > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > > > > - pool_full(osdc, req->r_t.base_oloc.pool))) {
> > > > > + pool_flag(osdc, req->r_t.base_oloc.pool,
> > > > > + CEPH_POOL_FLAG_FULL))) {
> > > > > dout("req %p full/pool_full\n", req);
> > > > > if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
> > > > > err = -ENOSPC;
> > > > > @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg)
> > > > >
> > > > > if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
> > > > > (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> > > > > - pool_full(osdc, req->r_t.base_oloc.pool))) {
> > > > > + pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) {
> > > > > if (!*victims) {
> > > > > update_epoch_barrier(osdc, osdc->osdmap->epoch);
> > > > > *victims = true;
> > > > > @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc)
> > > > > struct ceph_pg_pool_info *pi =
> > > > > rb_entry(n, struct ceph_pg_pool_info, node);
> > > > >
> > > > > - pi->was_full = __pool_full(pi);
> > > > > + pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL);
> > > > > }
> > > > > }
> > > > >
> > > > > @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id)
> > > > > if (!pi)
> > > > > return false;
> > > > >
> > > > > - return pi->was_full && !__pool_full(pi);
> > > > > + return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL);
> > > > > }
> > > > >
> > > > > static enum calc_target_result
> > > >
> > > > Hi Yanhu,
> > > >
> > > > Sorry for a late reply.
> > > >
> > > > This adds some unnecessary churn and also exposes a helper that
> > > > must be called under osdc->lock without making that obvious. How
> > > > about the attached instead?
> > > >
> > > > ceph_pg_pool_flags() takes osdmap instead of osdc, making it clear
> > > > that the caller is resposibile for keeping the map stable.
> > > >
> > > > Thanks,
> > > >
> > > > Ilya
> > >
> > > net/ceph/osdmap.c
> > > --------------------------
> > > bool ceph_pg_pool_flags(struct ceph_osdmap *map, s64 pool_id, int flag)
> > > {
> > > struct ceph_pg_pool_info *pi;
> > >
> > > /* CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL deprecated since mimic */
> > > if (flag & (CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_NEARFULL))
> > > if (map->flags & (CEPH_OSDMAP_FULL|CEPH_OSDMAP_NEARFULL))
> > > return true;
> > >
> > > pi = ceph_pg_pool_by_id(map, pool_id);
> > > if (!pi)
> > > return false;
> > >
> > > return pi->flags & flag;
> > > }
> > >
> > > fs/ceph/file.c
> > > -----------------
> > > ceph_write_iter() {
> > > ...
> > > down_read(&osdc->lock);
> > > if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id,
> > > CEPH_POOL_FLAG_FULL|CEPH_POOL_FLAG_FULL_QUOTA)) {
> > > err = -ENOSPC;
> > > up_read(&osdc->lock);
> > > goto out;
> > > }
> > > up_read(&osdc->lock);
> > > ...
> > > down_read(&osdc->lock);
> > > if (ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id,
> > > CEPH_POOL_FLAG_NEARFULL))
> > > iocb->ki_flags |= IOCB_DSYNC;
> > > up_read(&osdc->lock);
> > > ...
> > > }
> > >
> > > how about this?
> >
> > Well, this takes osdc->lock and looks up ceph_pg_pool_info twice.
> > Given that these checks are inherently racy, I think doing it once
> > at the top makes more sense.
>
> be modified as follow.
>
> ceph_write_iter() {
> ...
> down_read(&osdc->lock);
> pi = ceph_pg_pool_by_id(osdc->osdmap, ci->i_layout.pool_id);
> if (!pi) {
> err = -ENOENT;
> up_read(&osdc->lock);
> goto out;
> }
> up_read(&osdc->lock);
> ...
> }
No, this won't work because as soon as you release osdc->lock, pi may
get invalidated.
>
> >
> > Also, I don't think this does what you intended it to do. Your
> > ceph_pg_pool_flags(..., CEPH_POOL_FLAG_FULL) returns true even if
> > the map only has CEPH_OSDMAP_NEARFULL, triggering early ENOSPC.
> >
>
> Ah... my mistake. According to OSDMAP/POOL flag to do respectively.
>
> if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> ceph_pg_pool_flags(pi, CEPH_POOL_FLAG_FULL)) {
... and you may reference invalid memory here.
> err = -ENOSPC;
> goto out;
> }
>
> include/linux/ceph/osdmap.h
> --------------------------------------
> static inline bool ceph_pg_pool_flags(struct ceph_pg_pool_info *pi, int flag)
> {
> return pi->flags & flag;
> }
I'll go ahead with my version.
Thanks,
Ilya
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2020-03-12 14:33 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-03 9:33 [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag Yanhu Cao
2020-03-09 20:43 ` Ilya Dryomov
2020-03-11 9:55 ` Yanhu Cao
2020-03-11 13:41 ` Ilya Dryomov
2020-03-12 8:40 ` Yanhu Cao
2020-03-12 14:33 ` Ilya Dryomov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).