From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933249AbYCFMgr (ORCPT ); Thu, 6 Mar 2008 07:36:47 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1758860AbYCFMgh (ORCPT ); Thu, 6 Mar 2008 07:36:37 -0500 Received: from ik-out-1112.google.com ([66.249.90.176]:8694 "EHLO ik-out-1112.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756309AbYCFMgf convert rfc822-to-8bit (ORCPT ); Thu, 6 Mar 2008 07:36:35 -0500 DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=from:to:subject:date:user-agent:cc:references:in-reply-to:mime-version:content-type:content-transfer-encoding:content-disposition:message-id; b=StuzaKAOvjOUbfJAu4nMKiUrM0Gh5IEoDL+raFB2ERRL6/4+th/XyAXi8PCUP5HqMzsMyChmPnR/oTrlgj58kOA1M8e0shxADWPMtuc67kyIpnvC2iYvtnYSXg3ljqFGk7k3j1yrxcFwTxu4h4E+ZaIy3t4NOyJb+LAK+dhwrCw= From: Bastien ROUCARIES To: Suresh Siddha Subject: Re: [patch 2/2] x86, fpu: lazy allocation of FPU area - v4 Date: Thu, 6 Mar 2008 13:40:07 +0100 User-Agent: KMail/1.9.7 Cc: mingo@elte.hu, hpa@zytor.com, tglx@linutronix.de, andi@firstfloor.org, hch@infradead.org, linux-kernel@vger.kernel.org, Arjan van de Ven References: <20080304234003.603501000@linux-os.sc.intel.com> <20080304234003.752305000@linux-os.sc.intel.com> In-Reply-To: <20080304234003.752305000@linux-os.sc.intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset="iso-8859-15" Content-Transfer-Encoding: 8BIT Content-Disposition: inline Message-Id: <200803061340.09904.roucaries.bastien@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Le mercredi 5 mars 2008, Suresh Siddha a écrit : > Only allocate the FPU area when the application actually uses FPU, i.e., in > the first lazy FPU trap. This could save memory for non-fpu using apps. > > Signed-off-by: Suresh Siddha > Cc: Arjan van de Ven > --- > v4: Handles the kmem_cache_alloc() failures. > v3: Fixed the non-atomic calling sequence in atomic context. > v2: Ported to x86.git#testing with some name changes. > --- > > Index: linux-2.6-x86/arch/x86/kernel/i387.c > =================================================================== > --- linux-2.6-x86.orig/arch/x86/kernel/i387.c 2008-03-04 15:31:56.000000000 > -0800 +++ linux-2.6-x86/arch/x86/kernel/i387.c 2008-03-04 > 15:31:57.000000000 -0800 @@ -9,7 +9,6 @@ > #include > #include > #include > -#include > #include > #include > #include > @@ -67,7 +66,6 @@ > else > xstate_size = sizeof(struct i387_fsave_struct); > #endif > - init_task.thread.xstate = alloc_bootmem(xstate_size); > } > > #ifdef CONFIG_X86_64 > @@ -97,12 +95,22 @@ > * value at reset if we support XMM instructions and then > * remeber the current task has used the FPU. > */ > -void init_fpu(struct task_struct *tsk) > +int init_fpu(struct task_struct *tsk) > { > if (tsk_used_math(tsk)) { > if (tsk == current) > unlazy_fpu(tsk); > - return; > + return 0; > + } > + > + /* > + * Memory allocation at the first usage of the FPU and other state. > + */ > + if (!tsk->thread.xstate) { > + tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, > + GFP_KERNEL); > + if (!tsk->thread.xstate) > + return -ENOMEM; > } > > if (cpu_has_fxsr) { > @@ -124,6 +132,7 @@ > * Only the device not available exception or ptrace can call init_fpu. > */ > set_stopped_child_used_math(tsk); > + return 0; > } > > int fpregs_active(struct task_struct *target, const struct user_regset > *regset) @@ -140,10 +149,14 @@ > unsigned int pos, unsigned int count, > void *kbuf, void __user *ubuf) > { > + int ret; > + > if (!cpu_has_fxsr) > return -ENODEV; > > - init_fpu(target); > + ret = init_fpu(target); > + if (ret) > + return ret; > > return user_regset_copyout(&pos, &count, &kbuf, &ubuf, > &target->thread.xstate->fxsave, 0, -1); > @@ -158,7 +171,10 @@ > if (!cpu_has_fxsr) > return -ENODEV; > > - init_fpu(target); > + ret = init_fpu(target); > + if (ret) > + return ret; > + > set_stopped_child_used_math(target); > > ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, > @@ -314,11 +330,14 @@ > void *kbuf, void __user *ubuf) > { > struct user_i387_ia32_struct env; > + int ret; > > if (!HAVE_HWFP) > return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); > > - init_fpu(target); > + ret = init_fpu(target); > + if (ret) > + return ret; > > if (!cpu_has_fxsr) > return user_regset_copyout(&pos, &count, &kbuf, &ubuf, > @@ -343,7 +362,10 @@ > if (!HAVE_HWFP) > return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); > > - init_fpu(target); > + ret = init_fpu(target); > + if (ret) > + return ret; > + > set_stopped_child_used_math(target); > > if (!cpu_has_fxsr) > Index: linux-2.6-x86/arch/x86/kernel/process.c > =================================================================== > --- linux-2.6-x86.orig/arch/x86/kernel/process.c 2008-03-04 > 15:31:56.000000000 -0800 +++ > linux-2.6-x86/arch/x86/kernel/process.c 2008-03-04 15:32:23.000000000 -0800 > @@ -5,24 +5,34 @@ > #include > #include > > -static struct kmem_cache *task_xstate_cachep; > +struct kmem_cache *task_xstate_cachep; > > int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) > { > *dst = *src; > - dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); > - if (!dst->thread.xstate) > - return -ENOMEM; > - WARN_ON((unsigned long)dst->thread.xstate & 15); > - memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); > + if (src->thread.xstate) { > + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, > + GFP_KERNEL); > + if (!dst->thread.xstate) > + return -ENOMEM; > + WARN_ON((unsigned long)dst->thread.xstate & 15); > + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); > + } > return 0; > } > > -void free_thread_info(struct thread_info *ti) > +void free_thread_xstate(struct task_struct *tsk) > { > - kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate); > - ti->task->thread.xstate = NULL; > + if (tsk->thread.xstate) { > + kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); > + tsk->thread.xstate = NULL; > + } > +} > + > > +void free_thread_info(struct thread_info *ti) > +{ > + free_thread_xstate(ti->task); > free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); > } > > Index: linux-2.6-x86/include/asm-x86/processor.h > =================================================================== > --- linux-2.6-x86.orig/include/asm-x86/processor.h 2008-03-04 > 15:31:56.000000000 -0800 +++ > linux-2.6-x86/include/asm-x86/processor.h 2008-03-04 15:31:57.000000000 > -0800 @@ -354,6 +354,8 @@ > > extern void print_cpu_info(struct cpuinfo_x86 *); > extern unsigned int xstate_size; > +extern void free_thread_xstate(struct task_struct *); > +extern struct kmem_cache *task_xstate_cachep; > extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); > extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); > extern unsigned short num_cache_leaves; > Index: linux-2.6-x86/arch/x86/kernel/process_32.c > =================================================================== > --- linux-2.6-x86.orig/arch/x86/kernel/process_32.c 2008-03-04 > 15:31:56.000000000 -0800 +++ > linux-2.6-x86/arch/x86/kernel/process_32.c 2008-03-04 15:31:57.000000000 > -0800 @@ -524,6 +524,10 @@ > regs->cs = __USER_CS; > regs->ip = new_ip; > regs->sp = new_sp; > + /* > + * Free the old FP and other extended state > + */ > + free_thread_xstate(current); > } > EXPORT_SYMBOL_GPL(start_thread); > > Index: linux-2.6-x86/arch/x86/kernel/process_64.c > =================================================================== > --- linux-2.6-x86.orig/arch/x86/kernel/process_64.c 2008-03-04 > 15:31:56.000000000 -0800 +++ > linux-2.6-x86/arch/x86/kernel/process_64.c 2008-03-04 15:31:57.000000000 > -0800 @@ -552,6 +552,10 @@ > regs->ss = __USER_DS; > regs->flags = 0x200; > set_fs(USER_DS); > + /* > + * Free the old FP and other extended state > + */ > + free_thread_xstate(current); > } > EXPORT_SYMBOL_GPL(start_thread); > > Index: linux-2.6-x86/arch/x86/kernel/traps_32.c > =================================================================== > --- linux-2.6-x86.orig/arch/x86/kernel/traps_32.c 2008-03-04 > 15:31:56.000000000 -0800 +++ > linux-2.6-x86/arch/x86/kernel/traps_32.c 2008-03-04 15:31:57.000000000 > -0800 @@ -1168,9 +1168,23 @@ > struct thread_info *thread = current_thread_info(); > struct task_struct *tsk = thread->task; > > + if (!tsk_used_math(tsk)) { > +#ifdef CONFIG_PREEMPT > + local_irq_enable(); > +#endif > + /* > + * does a slab alloc which can sleep > + */ > + if (init_fpu(tsk)) { > + force_sig(SIGSEGV, tsk); May be SIGBUS will be better here? > + return; > + } > +#ifdef CONFIG_PREEMPT > + local_irq_disable(); > +#endif > + } > + > clts(); /* Allow maths ops (or we recurse) */ > - if (!tsk_used_math(tsk)) > - init_fpu(tsk); > restore_fpu(tsk); > thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ > tsk->fpu_counter++; > Index: linux-2.6-x86/arch/x86/kernel/traps_64.c > =================================================================== > --- linux-2.6-x86.orig/arch/x86/kernel/traps_64.c 2008-03-04 > 15:31:56.000000000 -0800 +++ > linux-2.6-x86/arch/x86/kernel/traps_64.c 2008-03-04 15:31:57.000000000 > -0800 @@ -1117,10 +1117,15 @@ > asmlinkage void math_state_restore(void) > { > struct task_struct *me = current; > - clts(); /* Allow maths ops (or we recurse) */ > > - if (!used_math()) > - init_fpu(me); > + if (!used_math()) { > + if (init_fpu(me)) { > + force_sig(SIGSEGV, me); > + return; > + } > + } > + > + clts(); /* Allow maths ops (or we recurse) */ > restore_fpu_checking(&me->thread.xstate->fxsave); > task_thread_info(me)->status |= TS_USEDFPU; > me->fpu_counter++; > Index: linux-2.6-x86/include/asm-x86/i387.h > =================================================================== > --- linux-2.6-x86.orig/include/asm-x86/i387.h 2008-03-04 15:31:56.000000000 > -0800 +++ linux-2.6-x86/include/asm-x86/i387.h 2008-03-04 > 15:31:57.000000000 -0800 @@ -21,7 +21,7 @@ > > extern void fpu_init(void); > extern void mxcsr_feature_mask_init(void); > -extern void init_fpu(struct task_struct *child); > +extern int init_fpu(struct task_struct *child); > extern asmlinkage void math_state_restore(void); > extern void init_thread_xstate(void); Regards -- "ROUCARIES Bastien"