LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Michal Hocko <mhocko@suse.cz>
To: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Qiang Huang <h.huangqiang@huawei.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [patch] mm, oom: remove gfp helper function
Date: Fri, 5 Dec 2014 15:05:39 +0100	[thread overview]
Message-ID: <20141205140539.GD2321@dhcp22.suse.cz> (raw)
In-Reply-To: <20141204201905.GA17790@phnom.home.cmpxchg.org>

On Thu 04-12-14 15:19:05, Johannes Weiner wrote:
[...]
> How about the following?  It changes the code flow to clarify what's
> actually going on there and gets rid of oom_gfp_allowed() altogether,
> instead of awkwardly trying to explain something that has no meaning.

Yes this makes a lot of sense.

> Btw, it looks like there is a bug with oom_killer_disabled, because it
> will return NULL for __GFP_NOFAIL.

Right! __GFP_NOFAIL allocation after oom is disabled cannot be
guaranteed.

> ---
> From: Johannes Weiner <hannes@cmpxchg.org>
> Subject: [patch] mm: page_alloc: embed OOM killing naturally into allocation
>  slowpath
> 
> The OOM killing invocation does a lot of duplicative checks against
> the task's allocation context.  Rework it to take advantage of the
> existing checks in the allocator slowpath.

Nice! Just document the __GFP_NOFAIL fix here as well, please.

> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>

Acked-by: Michal Hocko <mhocko@suse.cz>

I will rebase my oom vs pm-freezer patch
(http://marc.info/?l=linux-mm&m=141634503316543&w=2) which touches this
area on top of your patch.

Thanks!

> ---
>  include/linux/oom.h |  5 ----
>  mm/page_alloc.c     | 80 +++++++++++++++++++++++------------------------------
>  2 files changed, 35 insertions(+), 50 deletions(-)
> 
> diff --git a/include/linux/oom.h b/include/linux/oom.h
> index e8d6e1058723..4971874f54db 100644
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -85,11 +85,6 @@ static inline void oom_killer_enable(void)
>  	oom_killer_disabled = false;
>  }
>  
> -static inline bool oom_gfp_allowed(gfp_t gfp_mask)
> -{
> -	return (gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY);
> -}
> -
>  extern struct task_struct *find_lock_task_mm(struct task_struct *p);
>  
>  /* sysctls */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 616a2c956b4b..2df99ca56e28 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2232,12 +2232,21 @@ static inline struct page *
>  __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
>  	struct zonelist *zonelist, enum zone_type high_zoneidx,
>  	nodemask_t *nodemask, struct zone *preferred_zone,
> -	int classzone_idx, int migratetype)
> +	int classzone_idx, int migratetype, unsigned long *did_some_progress)
>  {
>  	struct page *page;
>  
> -	/* Acquire the per-zone oom lock for each zone */
> +	*did_some_progress = 0;
> +
> +	if (oom_killer_disabled)
> +		return NULL;
> +
> +	/*
> +	 * Acquire the per-zone oom lock for each zone.  If that
> +	 * fails, somebody else is making progress for us.
> +	 */
>  	if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
> +		*did_some_progress = 1;
>  		schedule_timeout_uninterruptible(1);
>  		return NULL;
>  	}
> @@ -2263,12 +2272,18 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
>  		goto out;
>  
>  	if (!(gfp_mask & __GFP_NOFAIL)) {
> +		/* Coredumps can quickly deplete all memory reserves */
> +		if (current->flags & PF_DUMPCORE)
> +			goto out;
>  		/* The OOM killer will not help higher order allocs */
>  		if (order > PAGE_ALLOC_COSTLY_ORDER)
>  			goto out;
>  		/* The OOM killer does not needlessly kill tasks for lowmem */
>  		if (high_zoneidx < ZONE_NORMAL)
>  			goto out;
> +		/* The OOM killer does not compensate for light reclaim */
> +		if (!(gfp_mask & __GFP_FS))
> +			goto out;
>  		/*
>  		 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
>  		 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
> @@ -2281,7 +2296,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
>  	}
>  	/* Exhausted what can be done so it's blamo time */
>  	out_of_memory(zonelist, gfp_mask, order, nodemask, false);
> -
> +	*did_some_progress = 1;
>  out:
>  	oom_zonelist_unlock(zonelist, gfp_mask);
>  	return page;
> @@ -2571,7 +2586,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	    (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
>  		goto nopage;
>  
> -restart:
>  	if (!(gfp_mask & __GFP_NO_KSWAPD))
>  		wake_all_kswapds(order, zonelist, high_zoneidx,
>  				preferred_zone, nodemask);
> @@ -2701,51 +2715,27 @@ rebalance:
>  	if (page)
>  		goto got_pg;
>  
> -	/*
> -	 * If we failed to make any progress reclaiming, then we are
> -	 * running out of options and have to consider going OOM
> -	 */
> -	if (!did_some_progress) {
> -		if (oom_gfp_allowed(gfp_mask)) {
> -			if (oom_killer_disabled)
> -				goto nopage;
> -			/* Coredumps can quickly deplete all memory reserves */
> -			if ((current->flags & PF_DUMPCORE) &&
> -			    !(gfp_mask & __GFP_NOFAIL))
> -				goto nopage;
> -			page = __alloc_pages_may_oom(gfp_mask, order,
> -					zonelist, high_zoneidx,
> -					nodemask, preferred_zone,
> -					classzone_idx, migratetype);
> -			if (page)
> -				goto got_pg;
> -
> -			if (!(gfp_mask & __GFP_NOFAIL)) {
> -				/*
> -				 * The oom killer is not called for high-order
> -				 * allocations that may fail, so if no progress
> -				 * is being made, there are no other options and
> -				 * retrying is unlikely to help.
> -				 */
> -				if (order > PAGE_ALLOC_COSTLY_ORDER)
> -					goto nopage;
> -				/*
> -				 * The oom killer is not called for lowmem
> -				 * allocations to prevent needlessly killing
> -				 * innocent tasks.
> -				 */
> -				if (high_zoneidx < ZONE_NORMAL)
> -					goto nopage;
> -			}
> -
> -			goto restart;
> -		}
> -	}
> -
>  	/* Check if we should retry the allocation */
>  	pages_reclaimed += did_some_progress;
>  	if (should_alloc_retry(gfp_mask, order, did_some_progress,
>  						pages_reclaimed)) {
> +		/*
> +		 * If we fail to make progress by freeing individual
> +		 * pages, but the allocation wants us to keep going,
> +		 * start OOM killing tasks.
> +		 */
> +		if (!did_some_progress) {
> +			page = __alloc_pages_may_oom(gfp_mask, order, zonelist,
> +						high_zoneidx, nodemask,
> +						preferred_zone, classzone_idx,
> +						migratetype,&did_some_progress);
> +			if (page)
> +				goto got_pg;
> +			if (!did_some_progress) {
> +				BUG_ON(gfp_mask & __GFP_NOFAIL);
> +				goto nopage;
> +			}
> +		}
>  		/* Wait for some write requests to complete then retry */
>  		wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
>  		goto rebalance;
> -- 
> 2.1.3
> 

-- 
Michal Hocko
SUSE Labs

  reply	other threads:[~2014-12-05 14:05 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-11-26 22:17 David Rientjes
2014-11-27 10:25 ` Michal Hocko
2014-12-01 23:30   ` Johannes Weiner
2014-12-03 15:52     ` Michal Hocko
2014-12-03 18:15       ` Johannes Weiner
2014-12-04 15:17         ` Michal Hocko
2014-12-04 20:19           ` Johannes Weiner
2014-12-05 14:05             ` Michal Hocko [this message]
2014-12-03 23:10       ` Andrew Morton
2014-12-01 23:23 ` Johannes Weiner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20141205140539.GD2321@dhcp22.suse.cz \
    --to=mhocko@suse.cz \
    --cc=akpm@linux-foundation.org \
    --cc=h.huangqiang@huawei.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=rientjes@google.com \
    --subject='Re: [patch] mm, oom: remove gfp helper function' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).