LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Ilya Dryomov <idryomov@gmail.com>
To: Yanhu Cao <gmayyyha@gmail.com>
Cc: Jeff Layton <jlayton@kernel.org>, Sage Weil <sage@redhat.com>,
	"David S. Miller" <davem@davemloft.net>,
	kuba@kernel.org, Ceph Development <ceph-devel@vger.kernel.org>,
	LKML <linux-kernel@vger.kernel.org>,
	netdev <netdev@vger.kernel.org>
Subject: Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag
Date: Mon, 9 Mar 2020 21:43:36 +0100	[thread overview]
Message-ID: <CAOi1vP8jZ2tX_dg90uZY5G8cKX2Lzyu2vrGT_Ew0gVsnK4DDMA@mail.gmail.com> (raw)
In-Reply-To: <20200303093327.8720-1-gmayyyha@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 7255 bytes --]

On Tue, Mar 3, 2020 at 10:33 AM Yanhu Cao <gmayyyha@gmail.com> wrote:
>
> CEPH_OSDMAP_FULL/NEARFULL has been deprecated since mimic, so it
> does not work well in new versions, added POOL flags to handle it.
>
> Signed-off-by: Yanhu Cao <gmayyyha@gmail.com>
> ---
>  fs/ceph/file.c                  |  9 +++++++--
>  include/linux/ceph/osd_client.h |  2 ++
>  include/linux/ceph/osdmap.h     |  3 ++-
>  net/ceph/osd_client.c           | 23 +++++++++++++----------
>  4 files changed, 24 insertions(+), 13 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 7e0190b1f821..84ec44f9d77a 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1482,7 +1482,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>         }
>
>         /* FIXME: not complete since it doesn't account for being at quota */
> -       if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
> +       if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) ||
> +           pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> +                                               CEPH_POOL_FLAG_FULL)) {
>                 err = -ENOSPC;
>                 goto out;
>         }
> @@ -1575,7 +1577,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>         }
>
>         if (written >= 0) {
> -               if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
> +               if (ceph_osdmap_flag(&fsc->client->osdc,
> +                                       CEPH_OSDMAP_NEARFULL) ||
> +                   pool_flag(&fsc->client->osdc, ci->i_layout.pool_id,
> +                                       CEPH_POOL_FLAG_NEARFULL))
>                         iocb->ki_flags |= IOCB_DSYNC;
>                 written = generic_write_sync(iocb, written);
>         }
> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> index 5a62dbd3f4c2..be9007b93862 100644
> --- a/include/linux/ceph/osd_client.h
> +++ b/include/linux/ceph/osd_client.h
> @@ -375,6 +375,8 @@ static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
>         return osdc->osdmap->flags & flag;
>  }
>
> +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag);
> +
>  extern int ceph_osdc_setup(void);
>  extern void ceph_osdc_cleanup(void);
>
> diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
> index e081b56f1c1d..88faacc11f55 100644
> --- a/include/linux/ceph/osdmap.h
> +++ b/include/linux/ceph/osdmap.h
> @@ -36,7 +36,8 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
>
>  #define CEPH_POOL_FLAG_HASHPSPOOL      (1ULL << 0) /* hash pg seed and pool id
>                                                        together */
> -#define CEPH_POOL_FLAG_FULL            (1ULL << 1) /* pool is full */
> +#define CEPH_POOL_FLAG_FULL            (1ULL << 1)  /* pool is full */
> +#define CEPH_POOL_FLAG_NEARFULL        (1ULL << 11) /* pool is nearfull */
>
>  struct ceph_pg_pool_info {
>         struct rb_node node;
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index b68b376d8c2f..9ad2b96c3e78 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -1447,9 +1447,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
>                 atomic_dec(&osd->o_osdc->num_homeless);
>  }
>
> -static bool __pool_full(struct ceph_pg_pool_info *pi)
> +static bool __pool_flag(struct ceph_pg_pool_info *pi, int flag)
>  {
> -       return pi->flags & CEPH_POOL_FLAG_FULL;
> +       return pi->flags & flag;
>  }
>
>  static bool have_pool_full(struct ceph_osd_client *osdc)
> @@ -1460,14 +1460,14 @@ static bool have_pool_full(struct ceph_osd_client *osdc)
>                 struct ceph_pg_pool_info *pi =
>                     rb_entry(n, struct ceph_pg_pool_info, node);
>
> -               if (__pool_full(pi))
> +               if (__pool_flag(pi, CEPH_POOL_FLAG_FULL))
>                         return true;
>         }
>
>         return false;
>  }
>
> -static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
> +bool pool_flag(struct ceph_osd_client *osdc, s64 pool_id, int flag)
>  {
>         struct ceph_pg_pool_info *pi;
>
> @@ -1475,8 +1475,10 @@ static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id)
>         if (!pi)
>                 return false;
>
> -       return __pool_full(pi);
> +       return __pool_flag(pi, flag);
>  }
> +EXPORT_SYMBOL(pool_flag);
> +
>
>  /*
>   * Returns whether a request should be blocked from being sent
> @@ -1489,7 +1491,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
>         bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD);
>         bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) ||
>                        ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> -                      __pool_full(pi);
> +                      __pool_flag(pi, CEPH_POOL_FLAG_FULL);
>
>         WARN_ON(pi->id != t->target_oloc.pool);
>         return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
> @@ -2320,7 +2322,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
>                    !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY |
>                                      CEPH_OSD_FLAG_FULL_FORCE)) &&
>                    (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> -                   pool_full(osdc, req->r_t.base_oloc.pool))) {
> +                  pool_flag(osdc, req->r_t.base_oloc.pool,
> +                            CEPH_POOL_FLAG_FULL))) {
>                 dout("req %p full/pool_full\n", req);
>                 if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
>                         err = -ENOSPC;
> @@ -2539,7 +2542,7 @@ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg)
>
>         if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
>             (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
> -            pool_full(osdc, req->r_t.base_oloc.pool))) {
> +            pool_flag(osdc, req->r_t.base_oloc.pool, CEPH_POOL_FLAG_FULL))) {
>                 if (!*victims) {
>                         update_epoch_barrier(osdc, osdc->osdmap->epoch);
>                         *victims = true;
> @@ -3707,7 +3710,7 @@ static void set_pool_was_full(struct ceph_osd_client *osdc)
>                 struct ceph_pg_pool_info *pi =
>                     rb_entry(n, struct ceph_pg_pool_info, node);
>
> -               pi->was_full = __pool_full(pi);
> +               pi->was_full = __pool_flag(pi, CEPH_POOL_FLAG_FULL);
>         }
>  }
>
> @@ -3719,7 +3722,7 @@ static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id)
>         if (!pi)
>                 return false;
>
> -       return pi->was_full && !__pool_full(pi);
> +       return pi->was_full && !__pool_flag(pi, CEPH_POOL_FLAG_FULL);
>  }
>
>  static enum calc_target_result

Hi Yanhu,

Sorry for a late reply.

This adds some unnecessary churn and also exposes a helper that
must be called under osdc->lock without making that obvious.  How
about the attached instead?

ceph_pg_pool_flags() takes osdmap instead of osdc, making it clear
that the caller is resposibile for keeping the map stable.

Thanks,

                Ilya

[-- Attachment #2: full-nearfull.patch --]
[-- Type: text/x-patch, Size: 3026 bytes --]

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ba46ba740628..c8d84e90a371 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1663,10 +1663,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_osd_client *osdc = &fsc->client->osdc;
 	struct ceph_cap_flush *prealloc_cf;
 	ssize_t count, written = 0;
 	int err, want, got;
 	bool direct_lock = false;
+	u32 map_flags;
+	u64 pool_flags;
 	loff_t pos;
 	loff_t limit = max(i_size_read(inode), fsc->max_file_size);
 
@@ -1730,7 +1733,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	}
 
 	/* FIXME: not complete since it doesn't account for being at quota */
-	if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
+	down_read(&osdc->lock);
+	map_flags = osdc->osdmap->flags;
+	pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
+	up_read(&osdc->lock);
+	if ((map_flags & CEPH_OSDMAP_FULL) ||
+	    (pool_flags & CEPH_POOL_FLAG_FULL)) {
 		err = -ENOSPC;
 		goto out;
 	}
@@ -1823,7 +1831,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	}
 
 	if (written >= 0) {
-		if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
+		if ((map_flags & CEPH_OSDMAP_NEARFULL) ||
+		    (pool_flags & CEPH_POOL_FLAG_NEARFULL))
 			iocb->ki_flags |= IOCB_DSYNC;
 		written = generic_write_sync(iocb, written);
 	}
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index e081b56f1c1d..651e1b967608 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -37,6 +37,7 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
 #define CEPH_POOL_FLAG_HASHPSPOOL	(1ULL << 0) /* hash pg seed and pool id
 						       together */
 #define CEPH_POOL_FLAG_FULL		(1ULL << 1) /* pool is full */
+#define CEPH_POOL_FLAG_NEARFULL		(1ULL << 11) /* pool is nearfull */
 
 struct ceph_pg_pool_info {
 	struct rb_node node;
@@ -304,5 +305,6 @@ extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
 
 extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
 extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
+u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id);
 
 #endif
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4e0de14f80bb..2a6e63a8edbe 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -710,6 +710,15 @@ int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
 }
 EXPORT_SYMBOL(ceph_pg_poolid_by_name);
 
+u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id)
+{
+	struct ceph_pg_pool_info *pi;
+
+	pi = __lookup_pg_pool(&map->pg_pools, id);
+	return pi ? pi->flags : 0;
+}
+EXPORT_SYMBOL(ceph_pg_pool_flags);
+
 static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
 {
 	rb_erase(&pi->node, root);

  reply	other threads:[~2020-03-09 20:43 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-03  9:33 Yanhu Cao
2020-03-09 20:43 ` Ilya Dryomov [this message]
2020-03-11  9:55   ` Yanhu Cao
2020-03-11 13:41     ` Ilya Dryomov
2020-03-12  8:40       ` Yanhu Cao
2020-03-12 14:33         ` Ilya Dryomov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAOi1vP8jZ2tX_dg90uZY5G8cKX2Lzyu2vrGT_Ew0gVsnK4DDMA@mail.gmail.com \
    --to=idryomov@gmail.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=davem@davemloft.net \
    --cc=gmayyyha@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=sage@redhat.com \
    --subject='Re: [v2] ceph: using POOL FULL flag instead of OSDMAP FULL flag' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).