LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org
Cc: Justin Forbes <jmforbes@linuxtx.org>,
	Zwane Mwaikambo <zwane@arm.linux.org.uk>,
	"Theodore Ts'o" <tytso@mit.edu>,
	Randy Dunlap <rdunlap@xenotime.net>,
	Dave Jones <davej@redhat.com>,
	Chuck Wolber <chuckw@quantumlinux.com>,
	Chris Wedgwood <reviews@ml.cw.f00f.org>,
	Michael Krufky <mkrufky@linuxtv.org>,
	Chuck Ebbert <cebbert@redhat.com>,
	Domenico Andreoli <cavokz@gmail.com>, Willy Tarreau <w@1wt.eu>,
	Rodrigo Rubira Branco <rbranco@la.checkpoint.com>,
	Jake Edge <jake@lwn.net>, Eugene Teo <eteo@redhat.com>,
	torvalds@linux-foundation.org, akpm@linux-foundation.org,
	alan@lxorguk.ukuu.org.uk, Miklos Szeredi <mszeredi@suse.cz>
Subject: [patch 20/49] net: unix: fix inflight counting bug in garbage collector
Date: Tue, 11 Nov 2008 16:23:35 -0800	[thread overview]
Message-ID: <20081112002335.GU10989@kroah.com> (raw)
In-Reply-To: <20081112002215.GA10989@kroah.com>

[-- Attachment #1: net-unix-fix-inflight-counting-bug-in-garbage-collector.patch --]
[-- Type: text/plain, Size: 6500 bytes --]

2.6.27-stable review patch.  If anyone has any objections, please let us know.

------------------

From: Miklos Szeredi <mszeredi@suse.cz>

commit 6209344f5a3795d34b7f2c0061f49802283b6bdd upstream

Previously I assumed that the receive queues of candidates don't
change during the GC.  This is only half true, nothing can be received
from the queues (see comment in unix_gc()), but buffers could be added
through the other half of the socket pair, which may still have file
descriptors referring to it.

This can result in inc_inflight_move_tail() erronously increasing the
"inflight" counter for a unix socket for which dec_inflight() wasn't
previously called.  This in turn can trigger the "BUG_ON(total_refs <
inflight_refs)" in a later garbage collection run.

Fix this by only manipulating the "inflight" counter for sockets which
are candidates themselves.  Duplicating the file references in
unix_attach_fds() is also needed to prevent a socket becoming a
candidate for GC while the skb that contains it is not yet queued.

Reported-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 include/net/af_unix.h |    1 +
 net/unix/af_unix.c    |   31 ++++++++++++++++++++++++-------
 net/unix/garbage.c    |   49 +++++++++++++++++++++++++++++++++++++------------
 3 files changed, 62 insertions(+), 19 deletions(-)

--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -54,6 +54,7 @@ struct unix_sock {
         atomic_long_t           inflight;
         spinlock_t		lock;
 	unsigned int		gc_candidate : 1;
+	unsigned int		gc_maybe_cycle : 1;
         wait_queue_head_t       peer_wait;
 };
 #define unix_sk(__sk) ((struct unix_sock *)__sk)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1300,14 +1300,23 @@ static void unix_destruct_fds(struct sk_
 	sock_wfree(skb);
 }
 
-static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
+
+	/*
+	 * Need to duplicate file references for the sake of garbage
+	 * collection.  Otherwise a socket in the fps might become a
+	 * candidate for GC while the skb is not yet queued.
+	 */
+	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+	if (!UNIXCB(skb).fp)
+		return -ENOMEM;
+
 	for (i=scm->fp->count-1; i>=0; i--)
 		unix_inflight(scm->fp->fp[i]);
-	UNIXCB(skb).fp = scm->fp;
 	skb->destructor = unix_destruct_fds;
-	scm->fp = NULL;
+	return 0;
 }
 
 /*
@@ -1366,8 +1375,11 @@ static int unix_dgram_sendmsg(struct kio
 		goto out;
 
 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-	if (siocb->scm->fp)
-		unix_attach_fds(siocb->scm, skb);
+	if (siocb->scm->fp) {
+		err = unix_attach_fds(siocb->scm, skb);
+		if (err)
+			goto out_free;
+	}
 	unix_get_secdata(siocb->scm, skb);
 
 	skb_reset_transport_header(skb);
@@ -1536,8 +1548,13 @@ static int unix_stream_sendmsg(struct ki
 		size = min_t(int, size, skb_tailroom(skb));
 
 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-		if (siocb->scm->fp)
-			unix_attach_fds(siocb->scm, skb);
+		if (siocb->scm->fp) {
+			err = unix_attach_fds(siocb->scm, skb);
+			if (err) {
+				kfree_skb(skb);
+				goto out_err;
+			}
+		}
 
 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
 			kfree_skb(skb);
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x
 				 */
 				struct sock *sk = unix_get_socket(*fp++);
 				if (sk) {
-					hit = true;
-					func(unix_sk(sk));
+					struct unix_sock *u = unix_sk(sk);
+
+					/*
+					 * Ignore non-candidates, they could
+					 * have been added to the queues after
+					 * starting the garbage collection
+					 */
+					if (u->gc_candidate) {
+						hit = true;
+						func(u);
+					}
 				}
 			}
 			if (hit && hitlist != NULL) {
@@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struc
 {
 	atomic_long_inc(&u->inflight);
 	/*
-	 * If this is still a candidate, move it to the end of the
-	 * list, so that it's checked even if it was already passed
-	 * over
+	 * If this still might be part of a cycle, move it to the end
+	 * of the list, so that it's checked even if it was already
+	 * passed over
 	 */
-	if (u->gc_candidate)
+	if (u->gc_maybe_cycle)
 		list_move_tail(&u->link, &gc_candidates);
 }
 
@@ -267,6 +276,7 @@ void unix_gc(void)
 	struct unix_sock *next;
 	struct sk_buff_head hitlist;
 	struct list_head cursor;
+	LIST_HEAD(not_cycle_list);
 
 	spin_lock(&unix_gc_lock);
 
@@ -282,10 +292,14 @@ void unix_gc(void)
 	 *
 	 * Holding unix_gc_lock will protect these candidates from
 	 * being detached, and hence from gaining an external
-	 * reference.  This also means, that since there are no
-	 * possible receivers, the receive queues of these sockets are
-	 * static during the GC, even though the dequeue is done
-	 * before the detach without atomicity guarantees.
+	 * reference.  Since there are no possible receivers, all
+	 * buffers currently on the candidates' queues stay there
+	 * during the garbage collection.
+	 *
+	 * We also know that no new candidate can be added onto the
+	 * receive queues.  Other, non candidate sockets _can_ be
+	 * added to queue, so we must make sure only to touch
+	 * candidates.
 	 */
 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
 		long total_refs;
@@ -299,6 +313,7 @@ void unix_gc(void)
 		if (total_refs == inflight_refs) {
 			list_move_tail(&u->link, &gc_candidates);
 			u->gc_candidate = 1;
+			u->gc_maybe_cycle = 1;
 		}
 	}
 
@@ -325,14 +340,24 @@ void unix_gc(void)
 		list_move(&cursor, &u->link);
 
 		if (atomic_long_read(&u->inflight) > 0) {
-			list_move_tail(&u->link, &gc_inflight_list);
-			u->gc_candidate = 0;
+			list_move_tail(&u->link, &not_cycle_list);
+			u->gc_maybe_cycle = 0;
 			scan_children(&u->sk, inc_inflight_move_tail, NULL);
 		}
 	}
 	list_del(&cursor);
 
 	/*
+	 * not_cycle_list contains those sockets which do not make up a
+	 * cycle.  Restore these to the inflight list.
+	 */
+	while (!list_empty(&not_cycle_list)) {
+		u = list_entry(not_cycle_list.next, struct unix_sock, link);
+		u->gc_candidate = 0;
+		list_move_tail(&u->link, &gc_inflight_list);
+	}
+
+	/*
 	 * Now gc_candidates contains only garbage.  Restore original
 	 * inflight counters for these as well, and remove the skbuffs
 	 * which are creating the cycle(s).

-- 

  parent reply	other threads:[~2008-11-12  0:33 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20081112001401.926965113@mini.kroah.org>
2008-11-12  0:22 ` [patch 00/49] 2.6.27.5 stable review Greg KH
2008-11-12  0:22   ` [patch 01/49] ext3: wait on all pending commits in ext3_sync_fs Greg KH
2008-11-12  0:22   ` [patch 02/49] x86: add DMI quirk for AMI BIOS which corrupts address 0xc000 during resume Greg KH
2008-11-12  0:22   ` [patch 03/49] x86: reserve low 64K on AMI and Phoenix BIOS boxen Greg KH
2008-11-12  0:22   ` [patch 04/49] x86: add X86_RESERVE_LOW_64K Greg KH
2008-11-12  0:23   ` [patch 05/49] x86: fix CONFIG_X86_RESERVE_LOW_64K=y Greg KH
2008-11-12  0:23   ` [patch 06/49] x86: fix macro with bad_bios_dmi_table Greg KH
2008-11-12  0:23   ` [patch 07/49] cgroups: fix invalid cgrp->dentry before cgroup has been completely removed Greg KH
2008-11-12  0:23   ` [patch 08/49] hugetlb: pull gigantic page initialisation out of the default path Greg KH
2008-11-12  0:23   ` [patch 09/49] hugetlbfs: handle pages higher order than MAX_ORDER Greg KH
2008-11-12  0:23   ` [patch 10/49] cciss: fix regression firmware not displayed in procfs Greg KH
2008-11-12  0:23   ` [patch 11/49] cciss: fix sysfs broken symlink regression Greg KH
2008-11-12  0:23   ` [patch 12/49] cciss: new hardware support Greg KH
2008-11-12  0:23   ` [patch 13/49] md: linear: Fix a division by zero bug for very small arrays Greg KH
2008-11-12  0:23   ` [patch 14/49] md: fix bug in raid10 recovery Greg KH
2008-11-12  0:23   ` [patch 15/49] JFFS2: fix race condition in jffs2_lzo_compress() Greg KH
2008-11-12  0:23   ` [patch 16/49] JFFS2: Fix lack of locking in thread_should_wake() Greg KH
2008-11-12  0:23   ` [patch 17/49] ARM: xsc3: fix xsc3_l2_inv_range Greg KH
2008-11-12  0:23   ` [patch 18/49] MTD: Fix cfi_send_gen_cmd handling of x16 devices in x8 mode (v4) Greg KH
2008-11-12  0:23   ` [patch 19/49] x86: dont use tsc_khz to calculate lpj if notsc is passed Greg KH
2008-11-12  0:23   ` Greg KH [this message]
2008-11-12  0:23   ` [patch 21/49] r8169: get ethtool settings through the generic mii helper Greg KH
2008-11-12  0:23   ` [patch 22/49] r8169: fix RxMissed register access Greg KH
2008-11-12  0:23   ` [patch 23/49] r8169: wake up the PHY of the 8168 Greg KH
2008-11-12  0:23   ` [patch 24/49] I/OAT: fix channel resources free for not allocated channels Greg KH
2008-11-12  0:23   ` [patch 25/49] I/OAT: fix dma_pin_iovec_pages() error handling Greg KH
2008-11-12  0:23   ` [patch 26/49] I/OAT: fix async_tx.callback checking Greg KH
2008-11-12  0:23   ` [patch 27/49] dca: fixup initialization dependency Greg KH
2008-11-12  0:23   ` [patch 28/49] iwlwifi: allow consecutive scans in unassociated state Greg KH
2008-11-12  0:23   ` [patch 29/49] iwlwifi: allow association on radar channel in power save Greg KH
2008-11-12  0:23   ` [patch 30/49] iwlwifi: remove HT flags from RXON when not in HT anymore Greg KH
2008-11-12  0:23   ` [patch 31/49] iwlwifi: dont fail if scan is issued too early Greg KH
2008-11-12  0:24   ` [patch 32/49] iwlwifi: use correct DMA_MASK Greg KH
2008-11-12  0:24   ` [patch 33/49] iwlwifi: fix suspend to RAM in iwlwifi Greg KH
2008-11-12  0:24   ` [patch 34/49] iwlwifi: generic init calibrations framework Greg KH
2008-11-12  0:24   ` [patch 35/49] zd1211rw: Add 2 device IDs Greg KH
2008-11-12  0:24   ` [patch 36/49] iwl3945: fix deadlock on suspend Greg KH
2008-11-12  0:24   ` [patch 37/49] iwl3945: do not send scan command if channel count zero Greg KH
2008-11-12  0:24   ` [patch 38/49] cpqarry: fix return value of cpqarray_init() Greg KH
2008-11-12  0:24   ` [patch 39/49] ACPI: dock: avoid check _STA method Greg KH
2008-11-12  0:24   ` [patch 40/49] ARM: 5300/1: fixup spitz reset during boot Greg KH
2008-11-12  0:24   ` [patch 41/49] KEYS: Make request key instantiate the per-user keyrings Greg KH
2008-11-12  0:24   ` [patch 42/49] libata: fix last_reset timestamp handling Greg KH
2008-11-12  0:24   ` [patch 43/49] ALSA: hda: make a STAC_DELL_EQ option Greg KH
2008-11-12  0:24   ` [patch 44/49] Fix __pfn_to_page(pfn) for CONFIG_DISCONTIGMEM=y Greg KH
2008-11-12  0:24   ` [patch 45/49] mmc: increase SD write timeout for crappy cards Greg KH
2008-11-12  0:24   ` [patch 46/49] hfsplus: fix Buffer overflow with a corrupted image (CVE-2008-4933) Greg KH
2008-11-12  0:24   ` [patch 47/49] hfsplus: check read_mapping_page() return value (CVE-2008-4934) Greg KH
2008-11-12  0:24   ` [patch 48/49] hfs: fix namelength memory corruption (CVE-2008-5025) Greg KH
2008-11-12  0:24   ` [patch 49/49] HID: fix incorrent length condition in hidraw_write() Greg KH
2008-11-12  0:44   ` [patch 00/49] 2.6.27.5 stable review Gabriel C
2008-11-12  1:07     ` Greg KH
2008-11-12  0:54   ` Willy Tarreau
2008-11-12 14:08   ` Frans Pop
2008-11-12 17:03     ` [stable] " Greg KH
2008-11-13 22:07     ` Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081112002335.GU10989@kroah.com \
    --to=gregkh@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=cavokz@gmail.com \
    --cc=cebbert@redhat.com \
    --cc=chuckw@quantumlinux.com \
    --cc=davej@redhat.com \
    --cc=eteo@redhat.com \
    --cc=jake@lwn.net \
    --cc=jmforbes@linuxtx.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mkrufky@linuxtv.org \
    --cc=mszeredi@suse.cz \
    --cc=rbranco@la.checkpoint.com \
    --cc=rdunlap@xenotime.net \
    --cc=reviews@ml.cw.f00f.org \
    --cc=stable@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=tytso@mit.edu \
    --cc=w@1wt.eu \
    --cc=zwane@arm.linux.org.uk \
    --subject='Re: [patch 20/49] net: unix: fix inflight counting bug in garbage collector' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).