LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [RFC, PATCH] fix SEM_UNDO with namespaces
@ 2008-03-30 20:50 Manfred Spraul
  2008-03-31  7:12 ` Pavel Emelyanov
  0 siblings, 1 reply; 17+ messages in thread
From: Manfred Spraul @ 2008-03-30 20:50 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Andrew Morton, Pavel Emelyanov

[-- Attachment #1: Type: text/plain, Size: 516 bytes --]

Hi,

the attached patch should fix the combination of CLONE_NEWIPC with 
shared sysv undo structures (the common case, just 
sys_unshare(CLONE_NEWIPC)):
lookup_undo() now locates the undo array based on both semid and the 
namespace pointer.
Additionally, the patch tries to clean the code up by using the linked 
list macros from <linux/list.h> instead of single linked lists.

The patch passes a few quick tests, I'm interested in feedback. Are 
there test apps for testing the IPC namespace code?

--
    Manfred

[-- Attachment #2: patch-semundo-namespace --]
[-- Type: text/plain, Size: 10344 bytes --]

diff --git a/include/linux/sem.h b/include/linux/sem.h
index c8eaad9..8c110ec 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -95,7 +95,7 @@ struct sem_array {
 	struct sem		*sem_base;	/* ptr to first semaphore in array */
 	struct sem_queue	*sem_pending;	/* pending operations to be processed */
 	struct sem_queue	**sem_pending_last; /* last pending operation */
-	struct sem_undo		*undo;		/* undo requests on this array */
+	struct list_head	list_id;	/* undo requests on this array */
 	unsigned long		sem_nsems;	/* no. of semaphores in array */
 };
 
@@ -118,9 +118,10 @@ struct sem_queue {
  * when the process exits.
  */
 struct sem_undo {
-	struct sem_undo *	proc_next;	/* next entry on this process */
-	struct sem_undo *	id_next;	/* next entry on this semaphore set */
+	struct list_head	list_proc;	/* per-process list: all undos from one process */
+	struct list_head	list_id;	/* per semaphore array list: all undos for one array */
 	int			semid;		/* semaphore set identifier */
+	struct ipc_namespace	*ns;		/* namespace */
 	short *			semadj;		/* array of adjustments, one per semaphore */
 };
 
@@ -128,9 +129,9 @@ struct sem_undo {
  * that may be shared among all a CLONE_SYSVSEM task group.
  */ 
 struct sem_undo_list {
-	atomic_t	refcnt;
-	spinlock_t	lock;
-	struct sem_undo	*proc_list;
+	atomic_t		refcnt;
+	spinlock_t		lock;
+	struct list_head	list_proc;
 };
 
 struct sysv_sem {
diff --git a/ipc/sem.c b/ipc/sem.c
index 0b45a4d..2986817 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -272,7 +272,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	sma->sem_base = (struct sem *) &sma[1];
 	/* sma->sem_pending = NULL; */
 	sma->sem_pending_last = &sma->sem_pending;
-	/* sma->undo = NULL; */
+	INIT_LIST_HEAD(&sma->list_id);
 	sma->sem_nsems = nsems;
 	sma->sem_ctime = get_seconds();
 	sem_unlock(sma);
@@ -534,7 +534,8 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	 * (They will be freed without any further action in exit_sem()
 	 * or during the next semop.)
 	 */
-	for (un = sma->undo; un; un = un->id_next)
+	assert_spin_locked(&sma->sem_perm.lock);
+	list_for_each_entry(un, &sma->list_id, list_id)
 		un->semid = -1;
 
 	/* Wake up all pending processes and let them fail with EIDRM. */
@@ -773,9 +774,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 
 		for (i = 0; i < nsems; i++)
 			sma->sem_base[i].semval = sem_io[i];
-		for (un = sma->undo; un; un = un->id_next)
+
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_for_each_entry(un, &sma->list_id, list_id) {
 			for (i = 0; i < nsems; i++)
 				un->semadj[i] = 0;
+		}
 		sma->sem_ctime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
@@ -807,12 +811,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 	{
 		int val = arg.val;
 		struct sem_undo *un;
+
 		err = -ERANGE;
 		if (val > SEMVMX || val < 0)
 			goto out_unlock;
 
-		for (un = sma->undo; un; un = un->id_next)
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_for_each_entry(un, &sma->list_id, list_id)
 			un->semadj[semnum] = 0;
+
 		curr->semval = val;
 		curr->sempid = task_tgid_vnr(current);
 		sma->sem_ctime = get_seconds();
@@ -994,33 +1001,40 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 			return -ENOMEM;
 		spin_lock_init(&undo_list->lock);
 		atomic_set(&undo_list->refcnt, 1);
+		INIT_LIST_HEAD(&undo_list->list_proc);
+		
 		current->sysvsem.undo_list = undo_list;
 	}
 	*undo_listp = undo_list;
 	return 0;
 }
 
-static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
+static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, struct ipc_namespace *ns, int semid)
 {
-	struct sem_undo **last, *un;
-
-	last = &ulp->proc_list;
-	un = *last;
-	while(un != NULL) {
-		if(un->semid==semid)
-			break;
-		if(un->semid==-1) {
-			*last=un->proc_next;
-			kfree(un);
-		} else {
-			last=&un->proc_next;
+	struct sem_undo *walk, *tmp;
+
+	assert_spin_locked(&ulp->lock);
+	list_for_each_entry_safe(walk, tmp, &ulp->list_proc, list_proc) {
+		if(walk->semid==semid && walk->ns == ns)
+			return walk;
+		if(walk->semid==-1) {
+			list_del(&walk->list_proc);
+			kfree(walk);
 		}
-		un=*last;
 	}
-	return un;
+	return NULL;
 }
 
-static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
+/**
+ * find_alloc_undo - Lookup (and if not present create) undo array
+ * @ns: namespace
+ * @semid: semaphore array id
+ *
+ * The function looks up (and if not present creates) the undo structure.
+ * The size of the undo structure depends on the size of the semaphore
+ * array, thus the alloc path is not that straightforward.
+ */
+static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 {
 	struct sem_array *sma;
 	struct sem_undo_list *ulp;
@@ -1033,12 +1047,13 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 		return ERR_PTR(error);
 
 	spin_lock(&ulp->lock);
-	un = lookup_undo(ulp, semid);
+	un = lookup_undo(ulp, ns, semid);
 	spin_unlock(&ulp->lock);
 	if (likely(un!=NULL))
 		goto out;
 
 	/* no undo structure around - allocate one. */
+	/* step 1: figure out the size of the semaphore array */
 	sma = sem_lock_check(ns, semid);
 	if (IS_ERR(sma))
 		return ERR_PTR(PTR_ERR(sma));
@@ -1047,41 +1062,43 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 	ipc_rcu_getref(sma);
 	sem_unlock(sma);
 
+	/* step 2: allocate new undo structure */
 	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
-	if (!new) {
-		ipc_lock_by_ptr(&sma->sem_perm);
-		ipc_rcu_putref(sma);
-		sem_unlock(sma);
-		return ERR_PTR(-ENOMEM);
-	}
-	new->semadj = (short *) &new[1];
-	new->semid = semid;
 
+	/* step 3: reacquire all locks */
 	spin_lock(&ulp->lock);
-	un = lookup_undo(ulp, semid);
-	if (un) {
-		spin_unlock(&ulp->lock);
-		kfree(new);
-		ipc_lock_by_ptr(&sma->sem_perm);
-		ipc_rcu_putref(sma);
-		sem_unlock(sma);
-		goto out;
-	}
 	ipc_lock_by_ptr(&sma->sem_perm);
 	ipc_rcu_putref(sma);
-	if (sma->sem_perm.deleted) {
-		sem_unlock(sma);
-		spin_unlock(&ulp->lock);
+
+	/* step 4: check for failures: out of memory, array got removed */
+	un = ERR_PTR(-ENOMEM);
+	if (!new)
+		goto out_unlock;
+
+	un = ERR_PTR(-EIDRM);
+	if (sma->sem_perm.deleted)
+		goto out_unlock_free;
+
+	un = lookup_undo(ulp, ns, semid);
+	if (un) {
+out_unlock_free:
+		/* step 5a: another thread was faster, discard our structure */
 		kfree(new);
-		un = ERR_PTR(-EIDRM);
-		goto out;
+	} else {
+		/* step 5b: initialize & insert the new undo structure into all lists */
+		new->semadj = (short *) &new[1];
+		new->semid = semid;
+		new->ns = ns;
+
+		assert_spin_locked(&ulp->lock);
+		list_add(&new->list_proc, &ulp->list_proc);
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_add(&new->list_id, &sma->list_id);
+
+		un = new;
 	}
-	new->proc_next = ulp->proc_list;
-	ulp->proc_list = new;
-	new->id_next = sma->undo;
-	sma->undo = new;
+out_unlock:
 	sem_unlock(sma);
-	un = new;
 	spin_unlock(&ulp->lock);
 out:
 	return un;
@@ -1138,9 +1155,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 			alter = 1;
 	}
 
-retry_undos:
 	if (undos) {
-		un = find_undo(ns, semid);
+		un = find_alloc_undo(ns, semid);
 		if (IS_ERR(un)) {
 			error = PTR_ERR(un);
 			goto out_free;
@@ -1155,14 +1171,14 @@ retry_undos:
 	}
 
 	/*
-	 * semid identifiers are not unique - find_undo may have
+	 * semid identifiers are not unique - find_alloc_undo may have
 	 * allocated an undo structure, it was invalidated by an RMID
-	 * and now a new array with received the same id. Check and retry.
+	 * and now a new array with received the same id. Check and fail.
 	 */
-	if (un && un->semid == -1) {
-		sem_unlock(sma);
-		goto retry_undos;
-	}
+	error = -EIDRM;
+	if (un && un->semid == -1)
+		goto out_unlock_free;
+
 	error = -EFBIG;
 	if (max >= sma->sem_nsems)
 		goto out_unlock_free;
@@ -1291,55 +1307,41 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
  */
 void exit_sem(struct task_struct *tsk)
 {
-	struct sem_undo_list *undo_list;
-	struct sem_undo *u, **up;
-	struct ipc_namespace *ns;
+	struct sem_undo_list *ulp;
+	struct sem_undo *un, *tmp;
 
-	undo_list = tsk->sysvsem.undo_list;
-	if (!undo_list)
+	ulp = tsk->sysvsem.undo_list;
+	if (!ulp)
 		return;
 
-	if (!atomic_dec_and_test(&undo_list->refcnt))
+	if (!atomic_dec_and_test(&ulp->refcnt))
 		return;
 
-	ns = tsk->nsproxy->ipc_ns;
-	/* There's no need to hold the semundo list lock, as current
-         * is the last task exiting for this undo list.
-	 */
-	for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
+	spin_lock(&ulp->lock);
+	list_for_each_entry_safe(un, tmp, &ulp->list_proc, list_proc) {
 		struct sem_array *sma;
-		int nsems, i;
-		struct sem_undo *un, **unp;
-		int semid;
-	       
-		semid = u->semid;
-
-		if(semid == -1)
-			continue;
-		sma = sem_lock(ns, semid);
+		int i;
+
+		if(un->semid == -1)
+			goto free;
+		sma = sem_lock(un->ns, un->semid);
 		if (IS_ERR(sma))
-			continue;
+			goto free;
 
-		if (u->semid == -1)
+		if (un->semid == -1)
 			goto next_entry;
 
-		BUG_ON(sem_checkid(sma, u->semid));
+		BUG_ON(sem_checkid(sma, un->semid));
 
-		/* remove u from the sma->undo list */
-		for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
-			if (u == un)
-				goto found;
-		}
-		printk ("exit_sem undo list error id=%d\n", u->semid);
-		goto next_entry;
-found:
-		*unp = un->id_next;
-		/* perform adjustments registered in u */
-		nsems = sma->sem_nsems;
-		for (i = 0; i < nsems; i++) {
+		/* remove un from sma->list_id */
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_del(&un->list_id);
+
+		/* perform adjustments registered in un */
+		for (i = 0; i < sma->sem_nsems; i++) {
 			struct sem * semaphore = &sma->sem_base[i];
-			if (u->semadj[i]) {
-				semaphore->semval += u->semadj[i];
+			if (un->semadj[i]) {
+				semaphore->semval += un->semadj[i];
 				/*
 				 * Range checks of the new semaphore value,
 				 * not defined by sus:
@@ -1365,8 +1367,12 @@ found:
 		update_queue(sma);
 next_entry:
 		sem_unlock(sma);
+free:
+		list_del(&un->list_proc);
+		kfree(un);
 	}
-	kfree(undo_list);
+	spin_unlock(&ulp->lock);
+	kfree(ulp);
 }
 
 #ifdef CONFIG_PROC_FS

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-03-30 20:50 [RFC, PATCH] fix SEM_UNDO with namespaces Manfred Spraul
@ 2008-03-31  7:12 ` Pavel Emelyanov
  2008-03-31 16:14   ` Manfred Spraul
  0 siblings, 1 reply; 17+ messages in thread
From: Pavel Emelyanov @ 2008-03-31  7:12 UTC (permalink / raw)
  To: Manfred Spraul; +Cc: Linux Kernel Mailing List, Andrew Morton

Manfred Spraul wrote:
> Hi,
> 
> the attached patch should fix the combination of CLONE_NEWIPC with 
> shared sysv undo structures (the common case, just 
> sys_unshare(CLONE_NEWIPC)):
> lookup_undo() now locates the undo array based on both semid and the 
> namespace pointer.

If you start using any IPC object and then call unshare with CLONE_NEWIPC,
then it's your problem, but not the kernel. There are many issues that can
become broken from the user-level POV.

What's the problem with undo list? Does it become irrelevant and no undos
happen after the task dies?

I agree, that we should probably destroy this one when the task calls 
unshare, but trying to keep this list relevant is useless.

> Additionally, the patch tries to clean the code up by using the linked 
> list macros from <linux/list.h> instead of single linked lists.

May I ask you to split the patch into atomic parts, rather than
mixing fixes, reworks and cleanups together?

> The patch passes a few quick tests, I'm interested in feedback. Are 
> there test apps for testing the IPC namespace code?

This code is a part of OpenVZ kernels, so it passes some internal
tests we perform.

> --
>     Manfred
> 

Thanks,
Pavel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-03-31  7:12 ` Pavel Emelyanov
@ 2008-03-31 16:14   ` Manfred Spraul
  2008-04-01  9:44     ` Pavel Emelyanov
  0 siblings, 1 reply; 17+ messages in thread
From: Manfred Spraul @ 2008-03-31 16:14 UTC (permalink / raw)
  To: Pavel Emelyanov; +Cc: Linux Kernel Mailing List, Andrew Morton

Pavel Emelyanov wrote:
> Manfred Spraul wrote:
>   
>> Hi,
>>
>> the attached patch should fix the combination of CLONE_NEWIPC with 
>> shared sysv undo structures (the common case, just 
>> sys_unshare(CLONE_NEWIPC)):
>> lookup_undo() now locates the undo array based on both semid and the 
>> namespace pointer.
>>     
>
> If you start using any IPC object and then call unshare with CLONE_NEWIPC,
> then it's your problem, but not the kernel.
>   
The result is a kernel memory corruption, and kernel memory corruptions 
are always the kernel's problem.

The code assumed that a semaphore id is globally unique. With 
namespaces,  this is not true anymore.
If two semaphore arrays exist with the same id, but different sizes, 
then semops will cause memory corruptions: The undo structure contains 
one element for each semaphore, thus the semop will write behind the end 
of the memory allocation.

> I agree, that we should probably destroy this one when the task calls 
> unshare, but trying to keep this list relevant is useless.
>   
A very tricky question: Let's assume we have a process with two threads.
The undo structure is shared, as per opengroup standard.
Now one thread calls unshare(CLONE_NEWIPC). What should happen? We 
cannot destroy the undo structure, the other thread might be still 
interested in it.
If we allow sys_unshare() for multithreaded processes with CLONE_NEWIPC 
and without CLONE_SYSVSEM, then we must handle this case.

--
    Manfred

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-03-31 16:14   ` Manfred Spraul
@ 2008-04-01  9:44     ` Pavel Emelyanov
  2008-04-01 14:15       ` Serge E. Hallyn
  2008-04-01 15:25       ` Eric W. Biederman
  0 siblings, 2 replies; 17+ messages in thread
From: Pavel Emelyanov @ 2008-04-01  9:44 UTC (permalink / raw)
  To: Manfred Spraul
  Cc: Linux Kernel Mailing List, Andrew Morton, Serge Hallyn,
	Sukadev Bhattiprolu, Eric W. Biederman

Manfred Spraul wrote:
> Pavel Emelyanov wrote:
>> Manfred Spraul wrote:
>>   
>>> Hi,
>>>
>>> the attached patch should fix the combination of CLONE_NEWIPC with 
>>> shared sysv undo structures (the common case, just 
>>> sys_unshare(CLONE_NEWIPC)):
>>> lookup_undo() now locates the undo array based on both semid and the 
>>> namespace pointer.
>>>     
>> If you start using any IPC object and then call unshare with CLONE_NEWIPC,
>> then it's your problem, but not the kernel.
>>   
> The result is a kernel memory corruption, and kernel memory corruptions 
> are always the kernel's problem.

Agree. Must be fixed, but I'm not sure we should try handling this
case by trying to de-op semaphores for former task namespace. I think
that destroying this list or returning -EBUSY for this case is OK.

> The code assumed that a semaphore id is globally unique. With 
> namespaces,  this is not true anymore.
> If two semaphore arrays exist with the same id, but different sizes, 
> then semops will cause memory corruptions: The undo structure contains 
> one element for each semaphore, thus the semop will write behind the end 
> of the memory allocation.
> 
>> I agree, that we should probably destroy this one when the task calls 
>> unshare, but trying to keep this list relevant is useless.
>>   
> A very tricky question: Let's assume we have a process with two threads.
> The undo structure is shared, as per opengroup standard.
> Now one thread calls unshare(CLONE_NEWIPC). What should happen? We 
> cannot destroy the undo structure, the other thread might be still 
> interested in it.
> If we allow sys_unshare() for multithreaded processes with CLONE_NEWIPC 
> and without CLONE_SYSVSEM, then we must handle this case.

Hm... I'd simply disable creating any new namespaces for threads.
I think other namespaces developers agree with me. Serge, Suka, Eric
what do you think?

> --
>     Manfred
> 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-04-01  9:44     ` Pavel Emelyanov
@ 2008-04-01 14:15       ` Serge E. Hallyn
  2008-04-03 19:04         ` Andrew Morton
  2008-04-01 15:25       ` Eric W. Biederman
  1 sibling, 1 reply; 17+ messages in thread
From: Serge E. Hallyn @ 2008-04-01 14:15 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: Manfred Spraul, Linux Kernel Mailing List, Andrew Morton,
	Serge Hallyn, Sukadev Bhattiprolu, Eric W. Biederman

Quoting Pavel Emelyanov (xemul@openvz.org):
> Manfred Spraul wrote:
> > Pavel Emelyanov wrote:
> >> Manfred Spraul wrote:
> >>   
> >>> Hi,
> >>>
> >>> the attached patch should fix the combination of CLONE_NEWIPC with 
> >>> shared sysv undo structures (the common case, just 
> >>> sys_unshare(CLONE_NEWIPC)):
> >>> lookup_undo() now locates the undo array based on both semid and the 
> >>> namespace pointer.
> >>>     
> >> If you start using any IPC object and then call unshare with CLONE_NEWIPC,
> >> then it's your problem, but not the kernel.
> >>   
> > The result is a kernel memory corruption, and kernel memory corruptions 
> > are always the kernel's problem.
> 
> Agree. Must be fixed, but I'm not sure we should try handling this
> case by trying to de-op semaphores for former task namespace. I think
> that destroying this list or returning -EBUSY for this case is OK.
> 
> > The code assumed that a semaphore id is globally unique. With 
> > namespaces,  this is not true anymore.
> > If two semaphore arrays exist with the same id, but different sizes, 
> > then semops will cause memory corruptions: The undo structure contains 
> > one element for each semaphore, thus the semop will write behind the end 
> > of the memory allocation.
> > 
> >> I agree, that we should probably destroy this one when the task calls 
> >> unshare, but trying to keep this list relevant is useless.
> >>   
> > A very tricky question: Let's assume we have a process with two threads.
> > The undo structure is shared, as per opengroup standard.
> > Now one thread calls unshare(CLONE_NEWIPC). What should happen? We 
> > cannot destroy the undo structure, the other thread might be still 
> > interested in it.
> > If we allow sys_unshare() for multithreaded processes with CLONE_NEWIPC 
> > and without CLONE_SYSVSEM, then we must handle this case.
> 
> Hm... I'd simply disable creating any new namespaces for threads.
> I think other namespaces developers agree with me. Serge, Suka, Eric
> what do you think?

Absolutely.

-serge

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-04-01  9:44     ` Pavel Emelyanov
  2008-04-01 14:15       ` Serge E. Hallyn
@ 2008-04-01 15:25       ` Eric W. Biederman
  2008-04-03 19:40         ` Manfred Spraul
  2008-04-03 19:44         ` Serge E. Hallyn
  1 sibling, 2 replies; 17+ messages in thread
From: Eric W. Biederman @ 2008-04-01 15:25 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: Manfred Spraul, Linux Kernel Mailing List, Andrew Morton,
	Serge Hallyn, Sukadev Bhattiprolu, Eric W. Biederman

Pavel Emelyanov <xemul@openvz.org> writes:
>>> I agree, that we should probably destroy this one when the task calls 
>>> unshare, but trying to keep this list relevant is useless.
>>>   
>> A very tricky question: Let's assume we have a process with two threads.
>> The undo structure is shared, as per opengroup standard.
>> Now one thread calls unshare(CLONE_NEWIPC). What should happen? We 
>> cannot destroy the undo structure, the other thread might be still 
>> interested in it.
>> If we allow sys_unshare() for multithreaded processes with CLONE_NEWIPC 
>> and without CLONE_SYSVSEM, then we must handle this case.
>
> Hm... I'd simply disable creating any new namespaces for threads.
> I think other namespaces developers agree with me. Serge, Suka, Eric
> what do you think?

I almost agree.  sys_unshare() in a multithreaded process breaks
all kinds of user space libs.  So you can only reasonably look at
the problem as what we do with linux tasks that share some things
and not others.  The posix/opengroup notion of processes and threads
are a distraction.

In this case requiring it appears that to require unsharing both
CLONE_SYSVSEM and CLONE_NEWIPC at the same time.  (i.e. unshare
of CLONE_SYSVSEM should fail if CLONE_NEWIPC is not also specified).

Then to make it work we make unshare of SYSVSEM succeed when it is
not shared.

This looks like about a 5 line patch or two.

The effect is because we don't support unsharing of SYSVSEM currently
we don't support a threaded process unsharing the ipc namespace.

Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-04-01 14:15       ` Serge E. Hallyn
@ 2008-04-03 19:04         ` Andrew Morton
  2008-04-03 19:31           ` Manfred Spraul
  0 siblings, 1 reply; 17+ messages in thread
From: Andrew Morton @ 2008-04-03 19:04 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: xemul, manfred, linux-kernel, serue, sukadev, ebiederm,
	Nadia Derbey, Pierre Peiffer

On Tue, 1 Apr 2008 09:15:41 -0500
"Serge E. Hallyn" <serue@us.ibm.com> wrote:

> Quoting Pavel Emelyanov (xemul@openvz.org):
> > Manfred Spraul wrote:
> > > Pavel Emelyanov wrote:
> > >> Manfred Spraul wrote:
> > >>   
> > >>> Hi,
> > >>>
> > >>> the attached patch should fix the combination of CLONE_NEWIPC with 
> > >>> shared sysv undo structures (the common case, just 
> > >>> sys_unshare(CLONE_NEWIPC)):
> > >>> lookup_undo() now locates the undo array based on both semid and the 
> > >>> namespace pointer.
> > >>>     
> > >> If you start using any IPC object and then call unshare with CLONE_NEWIPC,
> > >> then it's your problem, but not the kernel.
> > >>   
> > > The result is a kernel memory corruption, and kernel memory corruptions 
> > > are always the kernel's problem.
> > 
> > Agree. Must be fixed, but I'm not sure we should try handling this
> > case by trying to de-op semaphores for former task namespace. I think
> > that destroying this list or returning -EBUSY for this case is OK.
> > 
> > > The code assumed that a semaphore id is globally unique. With 
> > > namespaces,  this is not true anymore.
> > > If two semaphore arrays exist with the same id, but different sizes, 
> > > then semops will cause memory corruptions: The undo structure contains 
> > > one element for each semaphore, thus the semop will write behind the end 
> > > of the memory allocation.
> > > 
> > >> I agree, that we should probably destroy this one when the task calls 
> > >> unshare, but trying to keep this list relevant is useless.
> > >>   
> > > A very tricky question: Let's assume we have a process with two threads.
> > > The undo structure is shared, as per opengroup standard.
> > > Now one thread calls unshare(CLONE_NEWIPC). What should happen? We 
> > > cannot destroy the undo structure, the other thread might be still 
> > > interested in it.
> > > If we allow sys_unshare() for multithreaded processes with CLONE_NEWIPC 
> > > and without CLONE_SYSVSEM, then we must handle this case.
> > 
> > Hm... I'd simply disable creating any new namespaces for threads.
> > I think other namespaces developers agree with me. Serge, Suka, Eric
> > what do you think?
> 
> Absolutely.
> 

Guys, what's the status here?

afaict Manfred has identified an available-to-unprivileged-apps kernel
memory corrupter?  If so, we should fix it asap for 2.6.25.  And for
2.6.24.x if it's also present there.

Manfred's patch doesn't come close to applying against the 2.6.26 IPC
things which we have queued but that's OK - bugfixes come first.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-04-03 19:04         ` Andrew Morton
@ 2008-04-03 19:31           ` Manfred Spraul
  0 siblings, 0 replies; 17+ messages in thread
From: Manfred Spraul @ 2008-04-03 19:31 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Serge E. Hallyn, xemul, linux-kernel, sukadev, ebiederm,
	Nadia Derbey, Pierre Peiffer

Andrew Morton wrote:
>>
>> Absolutely.
>>
>>     
>
> Guys, what's the status here?
>
> afaict Manfred has identified an available-to-unprivileged-apps kernel
> memory corrupter?  If so, we should fix it asap for 2.6.25.  And for
> 2.6.24.x if it's also present there.
>
>   
No, it's a priveledged-only memory corruption:
> int unshare_nsproxy_namespaces(unsigned long unshare_flags,
>                 struct nsproxy **new_nsp, struct fs_struct *new_fs)
> {
>         int err = 0;
> [snip]
>         if (!capable(CAP_SYS_ADMIN))
>                 return -EPERM;

> Manfred's patch doesn't come close to applying against the 2.6.26 IPC
> things which we have queued but that's OK - bugfixes come first.
>   
Where can I find the queued changes? Are they in -mm?

--
    Manfred

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-04-01 15:25       ` Eric W. Biederman
@ 2008-04-03 19:40         ` Manfred Spraul
  2008-04-03 19:44         ` Serge E. Hallyn
  1 sibling, 0 replies; 17+ messages in thread
From: Manfred Spraul @ 2008-04-03 19:40 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Pavel Emelyanov, Linux Kernel Mailing List, Andrew Morton,
	Serge Hallyn, Sukadev Bhattiprolu

Eric W. Biederman wrote:
> Pavel Emelyanov <xemul@openvz.org> writes:
>   
>>>> I agree, that we should probably destroy this one when the task calls 
>>>> unshare, but trying to keep this list relevant is useless.
>>>>   
>>>>         
>>> A very tricky question: Let's assume we have a process with two threads.
>>> The undo structure is shared, as per opengroup standard.
>>> Now one thread calls unshare(CLONE_NEWIPC). What should happen? We 
>>> cannot destroy the undo structure, the other thread might be still 
>>> interested in it.
>>> If we allow sys_unshare() for multithreaded processes with CLONE_NEWIPC 
>>> and without CLONE_SYSVSEM, then we must handle this case.
>>>       
>> Hm... I'd simply disable creating any new namespaces for threads.
>> I think other namespaces developers agree with me. Serge, Suka, Eric
>> what do you think?
>>     
>
> I almost agree.  sys_unshare() in a multithreaded process breaks
> all kinds of user space libs.  So you can only reasonably look at
> the problem as what we do with linux tasks that share some things
> and not others.  The posix/opengroup notion of processes and threads
> are a distraction.
>
> In this case requiring it appears that to require unsharing both
> CLONE_SYSVSEM and CLONE_NEWIPC at the same time.  (i.e. unshare
> of CLONE_SYSVSEM should fail if CLONE_NEWIPC is not also specified).
> Then to make it work we make unshare of SYSVSEM succeed when it is not shared.
>
> This looks like about a 5 line patch or two.
>
>   
Probably something like this in copy_ipcs:
       if (current->sysvsem.undo_list != NULL && 
atomic_read(current->sysvsem.undo_list->refcount) > 1)
          return -EINVAL;

I'll think about it and write a patch over the weekend.

> The effect is because we don't support unsharing of SYSVSEM currently
> we don't support a threaded process unsharing the ipc namespace.
>   
I agree.
Btw: The manpage of unshare() is IMHO a bit misleading:
 >   NAME
 >       unshare - disassociate parts of the process execution context

unshare() only operates on a single thread, shouldn't the description be 
"disassociate parts of the thread execution context"?

--
    Manfred

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-04-01 15:25       ` Eric W. Biederman
  2008-04-03 19:40         ` Manfred Spraul
@ 2008-04-03 19:44         ` Serge E. Hallyn
  2008-04-04  4:39           ` Serge E. Hallyn
  1 sibling, 1 reply; 17+ messages in thread
From: Serge E. Hallyn @ 2008-04-03 19:44 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Pavel Emelyanov, Manfred Spraul, Linux Kernel Mailing List,
	Andrew Morton, Serge Hallyn, Sukadev Bhattiprolu

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Pavel Emelyanov <xemul@openvz.org> writes:
> >>> I agree, that we should probably destroy this one when the task calls 
> >>> unshare, but trying to keep this list relevant is useless.
> >>>   
> >> A very tricky question: Let's assume we have a process with two threads.
> >> The undo structure is shared, as per opengroup standard.
> >> Now one thread calls unshare(CLONE_NEWIPC). What should happen? We 
> >> cannot destroy the undo structure, the other thread might be still 
> >> interested in it.
> >> If we allow sys_unshare() for multithreaded processes with CLONE_NEWIPC 
> >> and without CLONE_SYSVSEM, then we must handle this case.
> >
> > Hm... I'd simply disable creating any new namespaces for threads.
> > I think other namespaces developers agree with me. Serge, Suka, Eric
> > what do you think?
> 
> I almost agree.  sys_unshare() in a multithreaded process breaks
> all kinds of user space libs.  So you can only reasonably look at
> the problem as what we do with linux tasks that share some things
> and not others.  The posix/opengroup notion of processes and threads
> are a distraction.
> 
> In this case requiring it appears that to require unsharing both
> CLONE_SYSVSEM and CLONE_NEWIPC at the same time.  (i.e. unshare
> of CLONE_SYSVSEM should fail if CLONE_NEWIPC is not also specified).
> 
> Then to make it work we make unshare of SYSVSEM succeed when it is
> not shared.
> 
> This looks like about a 5 line patch or two.
> 
> The effect is because we don't support unsharing of SYSVSEM currently
> we don't support a threaded process unsharing the ipc namespace.
> 
> Eric

Eric, does the following patch correctly interpret your recommendation?

Pavel does it make sense to you?

thanks,
-serge

>From 9c85fb3cb80cea1d888c3c253a9fb6e9bc173649 Mon Sep 17 00:00:00 2001
From: Serge E. Hallyn <serue@us.ibm.com>
Date: Thu, 3 Apr 2008 12:43:23 -0700
Subject: [PATCH 1/1] ipc namespaces: fix svsem unsharing issue

Refuse to unshare an ipcns if the semundo is shared and we
are not requesting a new SYSVSEM

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
---
 ipc/namespace.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/ipc/namespace.c b/ipc/namespace.c
index 9171d94..9044505 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -48,6 +48,16 @@ struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
 	if (!(flags & CLONE_NEWIPC))
 		return ns;
 
+	if (!(flags & CLONE_SYSVSEM)) {
+		if (!current->sysvsem.undo_list)
+			goto ok;
+		if (atomic_read(&current->sysvsem.undo_list->refcnt) == 1)
+			goto ok;
+		put_ipc_ns(ns);
+		return ERR_PTR(-EINVAL);
+	}
+
+ok:
 	new_ns = clone_ipc_ns(ns);
 
 	put_ipc_ns(ns);
-- 
1.5.3.6


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-04-03 19:44         ` Serge E. Hallyn
@ 2008-04-04  4:39           ` Serge E. Hallyn
  2008-04-06 15:11             ` Manfred Spraul
  0 siblings, 1 reply; 17+ messages in thread
From: Serge E. Hallyn @ 2008-04-04  4:39 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Eric W. Biederman, Pavel Emelyanov, Manfred Spraul,
	Linux Kernel Mailing List, Andrew Morton, Sukadev Bhattiprolu

Quoting Serge E. Hallyn (serue@us.ibm.com):
> Quoting Eric W. Biederman (ebiederm@xmission.com):
> > Pavel Emelyanov <xemul@openvz.org> writes:
> > >>> I agree, that we should probably destroy this one when the task calls 
> > >>> unshare, but trying to keep this list relevant is useless.
> > >>>   
> > >> A very tricky question: Let's assume we have a process with two threads.
> > >> The undo structure is shared, as per opengroup standard.
> > >> Now one thread calls unshare(CLONE_NEWIPC). What should happen? We 
> > >> cannot destroy the undo structure, the other thread might be still 
> > >> interested in it.
> > >> If we allow sys_unshare() for multithreaded processes with CLONE_NEWIPC 
> > >> and without CLONE_SYSVSEM, then we must handle this case.
> > >
> > > Hm... I'd simply disable creating any new namespaces for threads.
> > > I think other namespaces developers agree with me. Serge, Suka, Eric
> > > what do you think?
> > 
> > I almost agree.  sys_unshare() in a multithreaded process breaks
> > all kinds of user space libs.  So you can only reasonably look at
> > the problem as what we do with linux tasks that share some things
> > and not others.  The posix/opengroup notion of processes and threads
> > are a distraction.
> > 
> > In this case requiring it appears that to require unsharing both
> > CLONE_SYSVSEM and CLONE_NEWIPC at the same time.  (i.e. unshare
> > of CLONE_SYSVSEM should fail if CLONE_NEWIPC is not also specified).
> > 
> > Then to make it work we make unshare of SYSVSEM succeed when it is
> > not shared.
> > 
> > This looks like about a 5 line patch or two.
> > 
> > The effect is because we don't support unsharing of SYSVSEM currently
> > we don't support a threaded process unsharing the ipc namespace.
> > 
> > Eric
> 
> Eric, does the following patch correctly interpret your recommendation?
> 
> Pavel does it make sense to you?
> 
> thanks,
> -serge
> 
> >From 9c85fb3cb80cea1d888c3c253a9fb6e9bc173649 Mon Sep 17 00:00:00 2001
> From: Serge E. Hallyn <serue@us.ibm.com>
> Date: Thu, 3 Apr 2008 12:43:23 -0700
> Subject: [PATCH 1/1] ipc namespaces: fix svsem unsharing issue
> 
> Refuse to unshare an ipcns if the semundo is shared and we
> are not requesting a new SYSVSEM
> 
> Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
> ---
>  ipc/namespace.c |   10 ++++++++++
>  1 files changed, 10 insertions(+), 0 deletions(-)
> 
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index 9171d94..9044505 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -48,6 +48,16 @@ struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
>  	if (!(flags & CLONE_NEWIPC))
>  		return ns;
> 
> +	if (!(flags & CLONE_SYSVSEM)) {

Wait, this should be opposite, right?

> +		if (!current->sysvsem.undo_list)
> +			goto ok;
> +		if (atomic_read(&current->sysvsem.undo_list->refcnt) == 1)

And the refcnt check really isn't needed, right?  Either undo_list
exists and both parent and child have it, or it doesn't and we're fine.
So I suspect I can remove that.

Manfred, I'm trying to test this, but can't get an error without this
patch.  Do you have a testcase?  (I've tried some combinations of
creating a semarray in the new ipcns, creating one with fewer elements
than the index on which the undo operation was done, and not creating
a semarray in the new ipcsns at all, but the current code seems to
do fine.

Or maybe I fundamentally misunderstand the bug.

> +			goto ok;
> +		put_ipc_ns(ns);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +ok:
>  	new_ns = clone_ipc_ns(ns);
> 
>  	put_ipc_ns(ns);
> -- 
> 1.5.3.6

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-04-04  4:39           ` Serge E. Hallyn
@ 2008-04-06 15:11             ` Manfred Spraul
  2008-04-06 16:26               ` [PATCH] fix SEM_UNDO with namespaces, take 2 Manfred Spraul
  2008-04-14 21:10               ` [RFC, PATCH] fix SEM_UNDO with namespaces Serge E. Hallyn
  0 siblings, 2 replies; 17+ messages in thread
From: Manfred Spraul @ 2008-04-06 15:11 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Eric W. Biederman, Pavel Emelyanov, Linux Kernel Mailing List,
	Andrew Morton, Sukadev Bhattiprolu

[-- Attachment #1: Type: text/plain, Size: 1408 bytes --]

Serge E. Hallyn wrote:
>> diff --git a/ipc/namespace.c b/ipc/namespace.c
>> index 9171d94..9044505 100644
>> --- a/ipc/namespace.c
>> +++ b/ipc/namespace.c
>> @@ -48,6 +48,16 @@ struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
>>  	if (!(flags & CLONE_NEWIPC))
>>  		return ns;
>>
>> +	if (!(flags & CLONE_SYSVSEM)) {
>>     
>
> Wait, this should be opposite, right?
>
> [snip]
> Manfred, I'm trying to test this, but can't get an error without this
> patch.  Do you have a testcase?

The patch is bogus, sorry that I didn't notice it immediately.
The problem is ipc/sem.c, function find_undo, lookup_undo:
lookup_undo doesn't check the namespace pointer, thus a simple single 
threaded app can trigger the problem.

Attached is a test app and a kernel patch that shows the problem.
Run the test app immediately after boot (or within a new ipc namespace), 
otherwise the ipc sequence counter will prevent the app from triggering 
the problem: The undo structure that was created before unshare() [i.e. 
with 1 semaphore in it] will be used after unshare() [i.e. semaphore 100 
will be accessed].

With kernel debugging (full slub debugging) enabled, I even got an oops 
when I tried to ipcrm the left over array after running undons, probably 
because the undo structure was freed at exit_sem() within the new 
namespace, but still used in the outer namespace.

--
    Manfred

[-- Attachment #2: undons.c --]
[-- Type: text/plain, Size: 3350 bytes --]

/*
 * Copyright (C) 1999,2001 by Manfred Spraul.
 * 
 * Redistribution of this file is permitted under the terms of the GNU 
 * General Public License (GPL)
 * $Header: /home/manfred/cvs-tree/manfred/ipcsem/undotest.c,v 1.2 2003/06/28 15:19:43 manfred Exp $
 */

#ifdef __LINUX__
	#define _GNU_SOURCE
	#include <sched.h>
	#include <wait.h>
	
	#define CLONE_NEWIPC		0x08000000	/* New ipcs */
#endif
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/sem.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <signal.h>
#include <time.h>
#include <unistd.h>
#include <pthread.h>

union semun {
	int val;
	struct semid_ds *buf;
	unsigned short int *array;
	struct seminfo* __buf;
};

int getval(char * str, int id)
{
	union semun arg;
	int res;

	res = semctl(id,0,GETVAL,arg);
	if(res==-1) {
		printf("GETVAL failed for %s.\n", str);
		exit(4);
	}
	printf("   %s: GETVAL now %d.\n",str, res);
	return res;
}

void setzero(int id)
{
	union semun arg;
	int res;

	arg.val = 0;
	res = semctl(id,0,SETVAL,arg);
	if(res==-1) {
		printf("SETVAL failed, errno %d.\n", errno);
		exit(4);
	}
	printf("   SETVAL succeeded.\n");
}


/* test1: verify that undo works at all. */
int main(int argc,char** argv)
{
	int res, id;

	printf("  ****************************************\n");

	/* create array */
	res = semget(IPC_PRIVATE, 1, 0700 | IPC_CREAT);
	printf(" got semaphore array %xh.\n",res);
	if(res == -1) {
		printf(" create failed.\n");
		return 1;
	}
	id = res;

	setzero(id);

	res = getval("test1 init", id);
	if (res != 0) {
		printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n");
		printf("Bad: got unexpected value.\n");
		exit(99);
	}

	/* create sub-process */
	res = fork();
	if (res < 0) {
		printf("Fork failed (errno=%d). Aborting.\n", errno);
		res = semctl(id,1,IPC_RMID,NULL);
		exit(1);
	}
	fflush(stdout);
	if (!res) {
		struct sembuf sop[1];
		int id2;

		sop[0].sem_num=0;
		sop[0].sem_op=1;
		sop[0].sem_flg=SEM_UNDO;

		res = semop(id,sop,1);
		if(res==-1) {
			printf("semop failed.\n");
			exit(1);
		}
		res = getval("before unshare()", id);
		if (res != 1) {
			printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n");
			printf("Bad: got unexpected value.\n");
			exit(99);
		}
		fflush(stdout);

		errno = 0;
		res = unshare(CLONE_NEWNS|CLONE_NEWIPC);
		printf("  unshare returned %d, errno now %d (expected: 0,0).\n", res, errno);
		fflush(stdout);

		/* create array */
		res = semget(IPC_PRIVATE, 101, 0700 | IPC_CREAT);
		printf("  got semaphore array %xh.\n",res);
		if(res == -1) {
			printf(" create failed.\n");
			return 1;
		}
		id2 = res;
		printf("Now operating on id1=%x, id2=%x. (if both are equal, there will be a memory corruption)\n", id, id2);

		/* child: */
		sop[0].sem_num=100;
		sop[0].sem_op=1;
		sop[0].sem_flg=SEM_UNDO;

		// This operation should trash kernel memory  (if id1==id2)
		res = semop(id2,sop,1);
		if(res==-1) {
			printf("semop failed.\n");
			exit(1);
		}
		exit(1);
	} else {
		int retval;
		retval = wait4(res, NULL, 0, NULL);
		if (retval != res) {
			printf("wait4 returned unexpeted value %d, errno now %d.\n", 
					retval, errno);
		}
		res = getval("after exit", id);
		if (res != 0) {
			printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n");
			printf("Bad: got unexpected value.\n");
			return 1;
		}
	}

	return 0;
}

[-- Attachment #3: patch-help --]
[-- Type: text/plain, Size: 636 bytes --]

diff --git a/ipc/sem.c b/ipc/sem.c
index 0b45a4d..5c882f9 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1081,6 +1082,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 	new->id_next = sma->undo;
 	sma->undo = new;
 	sem_unlock(sma);
+printk(KERN_ERR "find_undo(%p, %x) new undop %p (for %d sems).\n", ns, semid, new, nsems);
 	un = new;
 	spin_unlock(&ulp->lock);
 out:
@@ -1153,6 +1155,7 @@ retry_undos:
 		error = PTR_ERR(sma);
 		goto out_free;
 	}
+printk(KERN_ERR "find_undo(%p, %x) == %p, using %ld sems.\n", ns, semid, un, sma->sem_nsems);
 
 	/*
 	 * semid identifiers are not unique - find_undo may have

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH] fix SEM_UNDO with namespaces, take 2
  2008-04-06 15:11             ` Manfred Spraul
@ 2008-04-06 16:26               ` Manfred Spraul
  2008-04-07  7:21                 ` Pavel Emelyanov
  2008-04-14 21:10               ` [RFC, PATCH] fix SEM_UNDO with namespaces Serge E. Hallyn
  1 sibling, 1 reply; 17+ messages in thread
From: Manfred Spraul @ 2008-04-06 16:26 UTC (permalink / raw)
  To: Linux Kernel Mailing List
  Cc: Serge E. Hallyn, Eric W. Biederman, Pavel Emelyanov,
	Andrew Morton, Sukadev Bhattiprolu

[-- Attachment #1: Type: text/plain, Size: 443 bytes --]

Hi,

below is the second attempt to fix SEM_UNDO + unshare():
lookup_undo (in ipc/sem.c) is not namespace-aware, thus all entries in 
sysvsem.undo_list must be from the same namespace.
The patch enforces that by detaching the current thread from 
sysvsem.undo_list in switch_task_namespaces() if the ipc namespace is 
changed.

The patch boots and passes simple sysvsem+unshare tests.

Signed-Off-By: Manfred Spraul <manfred@colorfullife.com>

[-- Attachment #2: patch-namespace-detach --]
[-- Type: text/plain, Size: 1128 bytes --]

diff --git a/ipc/sem.c b/ipc/sem.c
index 0b45a4d..35841bd 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1298,6 +1298,7 @@ void exit_sem(struct task_struct *tsk)
 	undo_list = tsk->sysvsem.undo_list;
 	if (!undo_list)
 		return;
+	tsk->sysvsem.undo_list = NULL;
 
 	if (!atomic_dec_and_test(&undo_list->refcnt))
 		return;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f5d332c..ddeb9d1 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -211,6 +211,18 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
 
 	might_sleep();
 
+	if ((p->nsproxy == NULL && new != NULL) ||
+		(p->nsproxy != NULL && new == NULL) ||
+		(p->nsproxy != NULL && new != NULL && p->nsproxy->ipc_ns != new->ipc_ns)) {
+		/* switching the IPC namespace is considered equivalent to sys_exit() wrt.
+		 * to outstanding SEM_UNDO undos: After switching to the new IPC namespace,
+		 * the semaphore arrays from the old namespace are not accessible anymore.
+		 * 
+		 * Additionally, an implicit sys_unshare(CLONE_SYSVSEM) is performed.
+		 */
+		exit_sem(p);
+	}
+
 	ns = p->nsproxy;
 
 	rcu_assign_pointer(p->nsproxy, new);

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] fix SEM_UNDO with namespaces, take 2
  2008-04-06 16:26               ` [PATCH] fix SEM_UNDO with namespaces, take 2 Manfred Spraul
@ 2008-04-07  7:21                 ` Pavel Emelyanov
  2008-04-07 17:03                   ` Manfred Spraul
  0 siblings, 1 reply; 17+ messages in thread
From: Pavel Emelyanov @ 2008-04-07  7:21 UTC (permalink / raw)
  To: Manfred Spraul
  Cc: Linux Kernel Mailing List, Serge E. Hallyn, Eric W. Biederman,
	Andrew Morton, Sukadev Bhattiprolu

Manfred Spraul wrote:
> Hi,
> 
> below is the second attempt to fix SEM_UNDO + unshare():
> lookup_undo (in ipc/sem.c) is not namespace-aware, thus all entries in 
> sysvsem.undo_list must be from the same namespace.
> The patch enforces that by detaching the current thread from 
> sysvsem.undo_list in switch_task_namespaces() if the ipc namespace is 
> changed.
> 
> The patch boots and passes simple sysvsem+unshare tests.
> 
> Signed-Off-By: Manfred Spraul <manfred@colorfullife.com>
> 
> @@ -211,6 +211,18 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
>  
>  	might_sleep();
>  
> +	if ((p->nsproxy == NULL && new != NULL) ||
> +		(p->nsproxy != NULL && new == NULL) ||
> +		(p->nsproxy != NULL && new != NULL && p->nsproxy->ipc_ns != new->ipc_ns)) {
> +		/* switching the IPC namespace is considered equivalent to sys_exit() wrt.
> +		 * to outstanding SEM_UNDO undos: After switching to the new IPC namespace,
> +		 * the semaphore arrays from the old namespace are not accessible anymore.
> +		 * 
> +		 * Additionally, an implicit sys_unshare(CLONE_SYSVSEM) is performed.
> +		 */
> +		exit_sem(p);
> +	}
> +

No, switch_task_namespaces is the wrong place to do this. It is to be 
done in copy_ipc_ns. If you need a task for which a new namespace is 
being prepared, then pass one into it.

>  	ns = p->nsproxy;
>  
>  	rcu_assign_pointer(p->nsproxy, new);


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] fix SEM_UNDO with namespaces, take 2
  2008-04-07  7:21                 ` Pavel Emelyanov
@ 2008-04-07 17:03                   ` Manfred Spraul
  2008-04-08  8:09                     ` Pavel Emelyanov
  0 siblings, 1 reply; 17+ messages in thread
From: Manfred Spraul @ 2008-04-07 17:03 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: Linux Kernel Mailing List, Serge E. Hallyn, Eric W. Biederman,
	Andrew Morton, Sukadev Bhattiprolu

[-- Attachment #1: Type: text/plain, Size: 817 bytes --]

Pavel Emelyanov wrote:
> No, switch_task_namespaces is the wrong place to do this. It is to be 
> done in copy_ipc_ns. If you need a task for which a new namespace is 
> being prepared, then pass one into it.
>
>   
copy_ipc_ns() is also the wrong place:
There are calls after copy_ipc_ns() that can fail (e.g. copy_pid_ns()), 
and if they fail, then the whole syscall is aborted.
But undoing outstanding semaphore operations cannot be undone.
Or simpler: the copy_whatever() functions do not modify current.

Another option would be within sys_unshare():
sys_unshare() first creates all new pointers, and then actual unsharing 
is performed.

What do you think? I that the right place? The implementation could be 
moved into a seperate function, perhaps some of the NULL tests are 
superflous, too.

--
    Manfred

[-- Attachment #2: patch-detach-in-sys_unshare --]
[-- Type: text/plain, Size: 1274 bytes --]

diff --git a/ipc/sem.c b/ipc/sem.c
index 0b45a4d..35841bd 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1298,6 +1298,7 @@ void exit_sem(struct task_struct *tsk)
 	undo_list = tsk->sysvsem.undo_list;
 	if (!undo_list)
 		return;
+	tsk->sysvsem.undo_list = NULL;
 
 	if (!atomic_dec_and_test(&undo_list->refcnt))
 		return;
diff --git a/kernel/fork.c b/kernel/fork.c
index 9c042f9..a3f3abb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1733,6 +1733,18 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 	if (new_fs ||  new_mm || new_fd || new_ulist || new_nsproxy) {
 
 		if (new_nsproxy) {
+			if ((current->nsproxy == NULL && new_nsproxy != NULL) ||
+				(current->nsproxy != NULL && new_nsproxy == NULL) ||
+				(current->nsproxy != NULL && new_nsproxy != NULL && current->nsproxy->ipc_ns != new_nsproxy->ipc_ns)) {
+				/* switching the IPC namespace is considered equivalent to sys_exit() wrt.
+				 * to outstanding SEM_UNDO undos: After switching to the new IPC namespace,
+				 * the semaphore arrays from the old namespace are not accessible anymore.
+				 * 
+				 * Additionally, an implicit sys_unshare(CLONE_SYSVSEM) is performed.
+				 */
+				exit_sem(current);
+			}
+
 			switch_task_namespaces(current, new_nsproxy);
 			new_nsproxy = NULL;
 		}

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] fix SEM_UNDO with namespaces, take 2
  2008-04-07 17:03                   ` Manfred Spraul
@ 2008-04-08  8:09                     ` Pavel Emelyanov
  0 siblings, 0 replies; 17+ messages in thread
From: Pavel Emelyanov @ 2008-04-08  8:09 UTC (permalink / raw)
  To: Manfred Spraul
  Cc: Linux Kernel Mailing List, Serge E. Hallyn, Eric W. Biederman,
	Andrew Morton, Sukadev Bhattiprolu

> +			if ((current->nsproxy == NULL && new_nsproxy != NULL) ||
> +				(current->nsproxy != NULL && new_nsproxy == NULL) ||
> +				(current->nsproxy != NULL && new_nsproxy != NULL && current->nsproxy->ipc_ns != new_nsproxy->ipc_ns)) {

Is the if (unshare_flags & CLONE_NEWIPC) check not enough?

Thanks,
Pavel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC, PATCH] fix SEM_UNDO with namespaces
  2008-04-06 15:11             ` Manfred Spraul
  2008-04-06 16:26               ` [PATCH] fix SEM_UNDO with namespaces, take 2 Manfred Spraul
@ 2008-04-14 21:10               ` Serge E. Hallyn
  1 sibling, 0 replies; 17+ messages in thread
From: Serge E. Hallyn @ 2008-04-14 21:10 UTC (permalink / raw)
  To: Manfred Spraul
  Cc: Serge E. Hallyn, Eric W. Biederman, Pavel Emelyanov,
	Linux Kernel Mailing List, Andrew Morton, Sukadev Bhattiprolu

Quoting Manfred Spraul (manfred@colorfullife.com):
> Serge E. Hallyn wrote:
>>> diff --git a/ipc/namespace.c b/ipc/namespace.c
>>> index 9171d94..9044505 100644
>>> --- a/ipc/namespace.c
>>> +++ b/ipc/namespace.c
>>> @@ -48,6 +48,16 @@ struct ipc_namespace *copy_ipcs(unsigned long flags, 
>>> struct ipc_namespace *ns)
>>>  	if (!(flags & CLONE_NEWIPC))
>>>  		return ns;
>>>
>>> +	if (!(flags & CLONE_SYSVSEM)) {
>>>     
>>
>> Wait, this should be opposite, right?
>>
>> [snip]
>> Manfred, I'm trying to test this, but can't get an error without this
>> patch.  Do you have a testcase?
>
> The patch is bogus, sorry that I didn't notice it immediately.
> The problem is ipc/sem.c, function find_undo, lookup_undo:
> lookup_undo doesn't check the namespace pointer, thus a simple single 
> threaded app can trigger the problem.
>
> Attached is a test app and a kernel patch that shows the problem.
> Run the test app immediately after boot (or within a new ipc namespace), 

Thanks, Manfred.  Basically what I was trying to do, except this appears
to actually work :)

So now when I replace fork+unshare(CLONE_NEWNS|CLONE_NEWIPC) with
clone(CLONE_NEWNS|CLONE_NEWIPC|CLONE_SYSVSEM) (moving the first semop
to before the clone), I do in fact manage to get an oops with the mmotm
I fetched this morning.  So I maintain that you do also need to ensure at
copy_namespaces() that if CLONE_NEWIPC is specified, then CLONE_SYSVSEM
is *not* specified (since, this being clone, specifying it means that
the semundo is shared with the parent).

thanks,
-serge

> otherwise the ipc sequence counter will prevent the app from triggering the 
> problem: The undo structure that was created before unshare() [i.e. with 1 
> semaphore in it] will be used after unshare() [i.e. semaphore 100 will be 
> accessed].
>
> With kernel debugging (full slub debugging) enabled, I even got an oops 
> when I tried to ipcrm the left over array after running undons, probably 
> because the undo structure was freed at exit_sem() within the new 
> namespace, but still used in the outer namespace.
>
> --
>    Manfred

> /*
>  * Copyright (C) 1999,2001 by Manfred Spraul.
>  * 
>  * Redistribution of this file is permitted under the terms of the GNU 
>  * General Public License (GPL)
>  * $Header: /home/manfred/cvs-tree/manfred/ipcsem/undotest.c,v 1.2 2003/06/28 15:19:43 manfred Exp $
>  */
> 
> #ifdef __LINUX__
> 	#define _GNU_SOURCE
> 	#include <sched.h>
> 	#include <wait.h>
> 	
> 	#define CLONE_NEWIPC		0x08000000	/* New ipcs */
> #endif
> #include <sys/types.h>
> #include <sys/ipc.h>
> #include <sys/sem.h>
> #include <stdlib.h>
> #include <stdio.h>
> #include <errno.h>
> #include <string.h>
> #include <assert.h>
> #include <signal.h>
> #include <time.h>
> #include <unistd.h>
> #include <pthread.h>
> 
> union semun {
> 	int val;
> 	struct semid_ds *buf;
> 	unsigned short int *array;
> 	struct seminfo* __buf;
> };
> 
> int getval(char * str, int id)
> {
> 	union semun arg;
> 	int res;
> 
> 	res = semctl(id,0,GETVAL,arg);
> 	if(res==-1) {
> 		printf("GETVAL failed for %s.\n", str);
> 		exit(4);
> 	}
> 	printf("   %s: GETVAL now %d.\n",str, res);
> 	return res;
> }
> 
> void setzero(int id)
> {
> 	union semun arg;
> 	int res;
> 
> 	arg.val = 0;
> 	res = semctl(id,0,SETVAL,arg);
> 	if(res==-1) {
> 		printf("SETVAL failed, errno %d.\n", errno);
> 		exit(4);
> 	}
> 	printf("   SETVAL succeeded.\n");
> }
> 
> 
> /* test1: verify that undo works at all. */
> int main(int argc,char** argv)
> {
> 	int res, id;
> 
> 	printf("  ****************************************\n");
> 
> 	/* create array */
> 	res = semget(IPC_PRIVATE, 1, 0700 | IPC_CREAT);
> 	printf(" got semaphore array %xh.\n",res);
> 	if(res == -1) {
> 		printf(" create failed.\n");
> 		return 1;
> 	}
> 	id = res;
> 
> 	setzero(id);
> 
> 	res = getval("test1 init", id);
> 	if (res != 0) {
> 		printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n");
> 		printf("Bad: got unexpected value.\n");
> 		exit(99);
> 	}
> 
> 	/* create sub-process */
> 	res = fork();
> 	if (res < 0) {
> 		printf("Fork failed (errno=%d). Aborting.\n", errno);
> 		res = semctl(id,1,IPC_RMID,NULL);
> 		exit(1);
> 	}
> 	fflush(stdout);
> 	if (!res) {
> 		struct sembuf sop[1];
> 		int id2;
> 
> 		sop[0].sem_num=0;
> 		sop[0].sem_op=1;
> 		sop[0].sem_flg=SEM_UNDO;
> 
> 		res = semop(id,sop,1);
> 		if(res==-1) {
> 			printf("semop failed.\n");
> 			exit(1);
> 		}
> 		res = getval("before unshare()", id);
> 		if (res != 1) {
> 			printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n");
> 			printf("Bad: got unexpected value.\n");
> 			exit(99);
> 		}
> 		fflush(stdout);
> 
> 		errno = 0;
> 		res = unshare(CLONE_NEWNS|CLONE_NEWIPC);
> 		printf("  unshare returned %d, errno now %d (expected: 0,0).\n", res, errno);
> 		fflush(stdout);
> 
> 		/* create array */
> 		res = semget(IPC_PRIVATE, 101, 0700 | IPC_CREAT);
> 		printf("  got semaphore array %xh.\n",res);
> 		if(res == -1) {
> 			printf(" create failed.\n");
> 			return 1;
> 		}
> 		id2 = res;
> 		printf("Now operating on id1=%x, id2=%x. (if both are equal, there will be a memory corruption)\n", id, id2);
> 
> 		/* child: */
> 		sop[0].sem_num=100;
> 		sop[0].sem_op=1;
> 		sop[0].sem_flg=SEM_UNDO;
> 
> 		// This operation should trash kernel memory  (if id1==id2)
> 		res = semop(id2,sop,1);
> 		if(res==-1) {
> 			printf("semop failed.\n");
> 			exit(1);
> 		}
> 		exit(1);
> 	} else {
> 		int retval;
> 		retval = wait4(res, NULL, 0, NULL);
> 		if (retval != res) {
> 			printf("wait4 returned unexpeted value %d, errno now %d.\n", 
> 					retval, errno);
> 		}
> 		res = getval("after exit", id);
> 		if (res != 0) {
> 			printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n");
> 			printf("Bad: got unexpected value.\n");
> 			return 1;
> 		}
> 	}
> 
> 	return 0;
> }

> diff --git a/ipc/sem.c b/ipc/sem.c
> index 0b45a4d..5c882f9 100644
> --- a/ipc/sem.c
> +++ b/ipc/sem.c
> @@ -1081,6 +1082,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
>  	new->id_next = sma->undo;
>  	sma->undo = new;
>  	sem_unlock(sma);
> +printk(KERN_ERR "find_undo(%p, %x) new undop %p (for %d sems).\n", ns, semid, new, nsems);
>  	un = new;
>  	spin_unlock(&ulp->lock);
>  out:
> @@ -1153,6 +1155,7 @@ retry_undos:
>  		error = PTR_ERR(sma);
>  		goto out_free;
>  	}
> +printk(KERN_ERR "find_undo(%p, %x) == %p, using %ld sems.\n", ns, semid, un, sma->sem_nsems);
> 
>  	/*
>  	 * semid identifiers are not unique - find_undo may have


^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2008-04-14 21:10 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-03-30 20:50 [RFC, PATCH] fix SEM_UNDO with namespaces Manfred Spraul
2008-03-31  7:12 ` Pavel Emelyanov
2008-03-31 16:14   ` Manfred Spraul
2008-04-01  9:44     ` Pavel Emelyanov
2008-04-01 14:15       ` Serge E. Hallyn
2008-04-03 19:04         ` Andrew Morton
2008-04-03 19:31           ` Manfred Spraul
2008-04-01 15:25       ` Eric W. Biederman
2008-04-03 19:40         ` Manfred Spraul
2008-04-03 19:44         ` Serge E. Hallyn
2008-04-04  4:39           ` Serge E. Hallyn
2008-04-06 15:11             ` Manfred Spraul
2008-04-06 16:26               ` [PATCH] fix SEM_UNDO with namespaces, take 2 Manfred Spraul
2008-04-07  7:21                 ` Pavel Emelyanov
2008-04-07 17:03                   ` Manfred Spraul
2008-04-08  8:09                     ` Pavel Emelyanov
2008-04-14 21:10               ` [RFC, PATCH] fix SEM_UNDO with namespaces Serge E. Hallyn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).