LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org
Cc: Justin Forbes <jmforbes@linuxtx.org>,
Zwane Mwaikambo <zwane@arm.linux.org.uk>,
"Theodore Ts'o" <tytso@mit.edu>,
Randy Dunlap <rdunlap@xenotime.net>,
Dave Jones <davej@redhat.com>,
Chuck Wolber <chuckw@quantumlinux.com>,
Chris Wedgwood <reviews@ml.cw.f00f.org>,
Michael Krufky <mkrufky@linuxtv.org>,
Chuck Ebbert <cebbert@redhat.com>,
Domenico Andreoli <cavokz@gmail.com>, Willy Tarreau <w@1wt.eu>,
Rodrigo Rubira Branco <rbranco@la.checkpoint.com>,
Jake Edge <jake@lwn.net>, Eugene Teo <eteo@redhat.com>,
torvalds@linux-foundation.org, akpm@linux-foundation.org,
alan@lxorguk.ukuu.org.uk, Miklos Szeredi <mszeredi@suse.cz>
Subject: [patch 20/49] net: unix: fix inflight counting bug in garbage collector
Date: Tue, 11 Nov 2008 16:23:35 -0800 [thread overview]
Message-ID: <20081112002335.GU10989@kroah.com> (raw)
In-Reply-To: <20081112002215.GA10989@kroah.com>
[-- Attachment #1: net-unix-fix-inflight-counting-bug-in-garbage-collector.patch --]
[-- Type: text/plain, Size: 6500 bytes --]
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Miklos Szeredi <mszeredi@suse.cz>
commit 6209344f5a3795d34b7f2c0061f49802283b6bdd upstream
Previously I assumed that the receive queues of candidates don't
change during the GC. This is only half true, nothing can be received
from the queues (see comment in unix_gc()), but buffers could be added
through the other half of the socket pair, which may still have file
descriptors referring to it.
This can result in inc_inflight_move_tail() erronously increasing the
"inflight" counter for a unix socket for which dec_inflight() wasn't
previously called. This in turn can trigger the "BUG_ON(total_refs <
inflight_refs)" in a later garbage collection run.
Fix this by only manipulating the "inflight" counter for sockets which
are candidates themselves. Duplicating the file references in
unix_attach_fds() is also needed to prevent a socket becoming a
candidate for GC while the skb that contains it is not yet queued.
Reported-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
include/net/af_unix.h | 1 +
net/unix/af_unix.c | 31 ++++++++++++++++++++++++-------
net/unix/garbage.c | 49 +++++++++++++++++++++++++++++++++++++------------
3 files changed, 62 insertions(+), 19 deletions(-)
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -54,6 +54,7 @@ struct unix_sock {
atomic_long_t inflight;
spinlock_t lock;
unsigned int gc_candidate : 1;
+ unsigned int gc_maybe_cycle : 1;
wait_queue_head_t peer_wait;
};
#define unix_sk(__sk) ((struct unix_sock *)__sk)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1300,14 +1300,23 @@ static void unix_destruct_fds(struct sk_
sock_wfree(skb);
}
-static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
int i;
+
+ /*
+ * Need to duplicate file references for the sake of garbage
+ * collection. Otherwise a socket in the fps might become a
+ * candidate for GC while the skb is not yet queued.
+ */
+ UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
for (i=scm->fp->count-1; i>=0; i--)
unix_inflight(scm->fp->fp[i]);
- UNIXCB(skb).fp = scm->fp;
skb->destructor = unix_destruct_fds;
- scm->fp = NULL;
+ return 0;
}
/*
@@ -1366,8 +1375,11 @@ static int unix_dgram_sendmsg(struct kio
goto out;
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp)
- unix_attach_fds(siocb->scm, skb);
+ if (siocb->scm->fp) {
+ err = unix_attach_fds(siocb->scm, skb);
+ if (err)
+ goto out_free;
+ }
unix_get_secdata(siocb->scm, skb);
skb_reset_transport_header(skb);
@@ -1536,8 +1548,13 @@ static int unix_stream_sendmsg(struct ki
size = min_t(int, size, skb_tailroom(skb));
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp)
- unix_attach_fds(siocb->scm, skb);
+ if (siocb->scm->fp) {
+ err = unix_attach_fds(siocb->scm, skb);
+ if (err) {
+ kfree_skb(skb);
+ goto out_err;
+ }
+ }
if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
kfree_skb(skb);
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x
*/
struct sock *sk = unix_get_socket(*fp++);
if (sk) {
- hit = true;
- func(unix_sk(sk));
+ struct unix_sock *u = unix_sk(sk);
+
+ /*
+ * Ignore non-candidates, they could
+ * have been added to the queues after
+ * starting the garbage collection
+ */
+ if (u->gc_candidate) {
+ hit = true;
+ func(u);
+ }
}
}
if (hit && hitlist != NULL) {
@@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struc
{
atomic_long_inc(&u->inflight);
/*
- * If this is still a candidate, move it to the end of the
- * list, so that it's checked even if it was already passed
- * over
+ * If this still might be part of a cycle, move it to the end
+ * of the list, so that it's checked even if it was already
+ * passed over
*/
- if (u->gc_candidate)
+ if (u->gc_maybe_cycle)
list_move_tail(&u->link, &gc_candidates);
}
@@ -267,6 +276,7 @@ void unix_gc(void)
struct unix_sock *next;
struct sk_buff_head hitlist;
struct list_head cursor;
+ LIST_HEAD(not_cycle_list);
spin_lock(&unix_gc_lock);
@@ -282,10 +292,14 @@ void unix_gc(void)
*
* Holding unix_gc_lock will protect these candidates from
* being detached, and hence from gaining an external
- * reference. This also means, that since there are no
- * possible receivers, the receive queues of these sockets are
- * static during the GC, even though the dequeue is done
- * before the detach without atomicity guarantees.
+ * reference. Since there are no possible receivers, all
+ * buffers currently on the candidates' queues stay there
+ * during the garbage collection.
+ *
+ * We also know that no new candidate can be added onto the
+ * receive queues. Other, non candidate sockets _can_ be
+ * added to queue, so we must make sure only to touch
+ * candidates.
*/
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
long total_refs;
@@ -299,6 +313,7 @@ void unix_gc(void)
if (total_refs == inflight_refs) {
list_move_tail(&u->link, &gc_candidates);
u->gc_candidate = 1;
+ u->gc_maybe_cycle = 1;
}
}
@@ -325,14 +340,24 @@ void unix_gc(void)
list_move(&cursor, &u->link);
if (atomic_long_read(&u->inflight) > 0) {
- list_move_tail(&u->link, &gc_inflight_list);
- u->gc_candidate = 0;
+ list_move_tail(&u->link, ¬_cycle_list);
+ u->gc_maybe_cycle = 0;
scan_children(&u->sk, inc_inflight_move_tail, NULL);
}
}
list_del(&cursor);
/*
+ * not_cycle_list contains those sockets which do not make up a
+ * cycle. Restore these to the inflight list.
+ */
+ while (!list_empty(¬_cycle_list)) {
+ u = list_entry(not_cycle_list.next, struct unix_sock, link);
+ u->gc_candidate = 0;
+ list_move_tail(&u->link, &gc_inflight_list);
+ }
+
+ /*
* Now gc_candidates contains only garbage. Restore original
* inflight counters for these as well, and remove the skbuffs
* which are creating the cycle(s).
--
next prev parent reply other threads:[~2008-11-12 0:33 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20081112001401.926965113@mini.kroah.org>
2008-11-12 0:22 ` [patch 00/49] 2.6.27.5 stable review Greg KH
2008-11-12 0:22 ` [patch 01/49] ext3: wait on all pending commits in ext3_sync_fs Greg KH
2008-11-12 0:22 ` [patch 02/49] x86: add DMI quirk for AMI BIOS which corrupts address 0xc000 during resume Greg KH
2008-11-12 0:22 ` [patch 03/49] x86: reserve low 64K on AMI and Phoenix BIOS boxen Greg KH
2008-11-12 0:22 ` [patch 04/49] x86: add X86_RESERVE_LOW_64K Greg KH
2008-11-12 0:23 ` [patch 05/49] x86: fix CONFIG_X86_RESERVE_LOW_64K=y Greg KH
2008-11-12 0:23 ` [patch 06/49] x86: fix macro with bad_bios_dmi_table Greg KH
2008-11-12 0:23 ` [patch 07/49] cgroups: fix invalid cgrp->dentry before cgroup has been completely removed Greg KH
2008-11-12 0:23 ` [patch 08/49] hugetlb: pull gigantic page initialisation out of the default path Greg KH
2008-11-12 0:23 ` [patch 09/49] hugetlbfs: handle pages higher order than MAX_ORDER Greg KH
2008-11-12 0:23 ` [patch 10/49] cciss: fix regression firmware not displayed in procfs Greg KH
2008-11-12 0:23 ` [patch 11/49] cciss: fix sysfs broken symlink regression Greg KH
2008-11-12 0:23 ` [patch 12/49] cciss: new hardware support Greg KH
2008-11-12 0:23 ` [patch 13/49] md: linear: Fix a division by zero bug for very small arrays Greg KH
2008-11-12 0:23 ` [patch 14/49] md: fix bug in raid10 recovery Greg KH
2008-11-12 0:23 ` [patch 15/49] JFFS2: fix race condition in jffs2_lzo_compress() Greg KH
2008-11-12 0:23 ` [patch 16/49] JFFS2: Fix lack of locking in thread_should_wake() Greg KH
2008-11-12 0:23 ` [patch 17/49] ARM: xsc3: fix xsc3_l2_inv_range Greg KH
2008-11-12 0:23 ` [patch 18/49] MTD: Fix cfi_send_gen_cmd handling of x16 devices in x8 mode (v4) Greg KH
2008-11-12 0:23 ` [patch 19/49] x86: dont use tsc_khz to calculate lpj if notsc is passed Greg KH
2008-11-12 0:23 ` Greg KH [this message]
2008-11-12 0:23 ` [patch 21/49] r8169: get ethtool settings through the generic mii helper Greg KH
2008-11-12 0:23 ` [patch 22/49] r8169: fix RxMissed register access Greg KH
2008-11-12 0:23 ` [patch 23/49] r8169: wake up the PHY of the 8168 Greg KH
2008-11-12 0:23 ` [patch 24/49] I/OAT: fix channel resources free for not allocated channels Greg KH
2008-11-12 0:23 ` [patch 25/49] I/OAT: fix dma_pin_iovec_pages() error handling Greg KH
2008-11-12 0:23 ` [patch 26/49] I/OAT: fix async_tx.callback checking Greg KH
2008-11-12 0:23 ` [patch 27/49] dca: fixup initialization dependency Greg KH
2008-11-12 0:23 ` [patch 28/49] iwlwifi: allow consecutive scans in unassociated state Greg KH
2008-11-12 0:23 ` [patch 29/49] iwlwifi: allow association on radar channel in power save Greg KH
2008-11-12 0:23 ` [patch 30/49] iwlwifi: remove HT flags from RXON when not in HT anymore Greg KH
2008-11-12 0:23 ` [patch 31/49] iwlwifi: dont fail if scan is issued too early Greg KH
2008-11-12 0:24 ` [patch 32/49] iwlwifi: use correct DMA_MASK Greg KH
2008-11-12 0:24 ` [patch 33/49] iwlwifi: fix suspend to RAM in iwlwifi Greg KH
2008-11-12 0:24 ` [patch 34/49] iwlwifi: generic init calibrations framework Greg KH
2008-11-12 0:24 ` [patch 35/49] zd1211rw: Add 2 device IDs Greg KH
2008-11-12 0:24 ` [patch 36/49] iwl3945: fix deadlock on suspend Greg KH
2008-11-12 0:24 ` [patch 37/49] iwl3945: do not send scan command if channel count zero Greg KH
2008-11-12 0:24 ` [patch 38/49] cpqarry: fix return value of cpqarray_init() Greg KH
2008-11-12 0:24 ` [patch 39/49] ACPI: dock: avoid check _STA method Greg KH
2008-11-12 0:24 ` [patch 40/49] ARM: 5300/1: fixup spitz reset during boot Greg KH
2008-11-12 0:24 ` [patch 41/49] KEYS: Make request key instantiate the per-user keyrings Greg KH
2008-11-12 0:24 ` [patch 42/49] libata: fix last_reset timestamp handling Greg KH
2008-11-12 0:24 ` [patch 43/49] ALSA: hda: make a STAC_DELL_EQ option Greg KH
2008-11-12 0:24 ` [patch 44/49] Fix __pfn_to_page(pfn) for CONFIG_DISCONTIGMEM=y Greg KH
2008-11-12 0:24 ` [patch 45/49] mmc: increase SD write timeout for crappy cards Greg KH
2008-11-12 0:24 ` [patch 46/49] hfsplus: fix Buffer overflow with a corrupted image (CVE-2008-4933) Greg KH
2008-11-12 0:24 ` [patch 47/49] hfsplus: check read_mapping_page() return value (CVE-2008-4934) Greg KH
2008-11-12 0:24 ` [patch 48/49] hfs: fix namelength memory corruption (CVE-2008-5025) Greg KH
2008-11-12 0:24 ` [patch 49/49] HID: fix incorrent length condition in hidraw_write() Greg KH
2008-11-12 0:44 ` [patch 00/49] 2.6.27.5 stable review Gabriel C
2008-11-12 1:07 ` Greg KH
2008-11-12 0:54 ` Willy Tarreau
2008-11-12 14:08 ` Frans Pop
2008-11-12 17:03 ` [stable] " Greg KH
2008-11-13 22:07 ` Greg KH
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081112002335.GU10989@kroah.com \
--to=gregkh@suse.de \
--cc=akpm@linux-foundation.org \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=cavokz@gmail.com \
--cc=cebbert@redhat.com \
--cc=chuckw@quantumlinux.com \
--cc=davej@redhat.com \
--cc=eteo@redhat.com \
--cc=jake@lwn.net \
--cc=jmforbes@linuxtx.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mkrufky@linuxtv.org \
--cc=mszeredi@suse.cz \
--cc=rbranco@la.checkpoint.com \
--cc=rdunlap@xenotime.net \
--cc=reviews@ml.cw.f00f.org \
--cc=stable@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=tytso@mit.edu \
--cc=w@1wt.eu \
--cc=zwane@arm.linux.org.uk \
--subject='Re: [patch 20/49] net: unix: fix inflight counting bug in garbage collector' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).