From: Nick Piggin Date: Wed, 9 Nov 2005 05:39:01 +0000 (-0800) Subject: [PATCH] sched: disable preempt in idle tasks X-Git-Tag: v2.6.15-rc1~118 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5bfb5d690f36d316a5f3b4f7775fda996faa6b12;p=thirdparty%2Fkernel%2Flinux.git [PATCH] sched: disable preempt in idle tasks Run idle threads with preempt disabled. Also corrected a bugs in arm26's cpu_idle (make it actually call schedule()). How did it ever work before? Might fix the CPU hotplugging hang which Nigel Cunningham noted. We think the bug hits if the idle thread is preempted after checking need_resched() and before going to sleep, then the CPU offlined. After calling stop_machine_run, the CPU eventually returns from preemption and into the idle thread and goes to sleep. The CPU will continue executing previous idle and have no chance to call play_dead. By disabling preemption until we are ready to explicitly schedule, this bug is fixed and the idle threads generally become more robust. From: alexs PPC build fix From: Yoichi Yuasa MIPS build fix Signed-off-by: Nick Piggin Signed-off-by: Yoichi Yuasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index ba298277becde..93dd92cc12f8b 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -116,13 +116,13 @@ void cpu_idle(void) if (!idle) idle = default_idle; - preempt_disable(); leds_event(led_idle_start); while (!need_resched()) idle(); leds_event(led_idle_end); - preempt_enable(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 77e2e9ca89fab..e55ea952f7aa1 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -256,7 +256,9 @@ void __cpuexit cpu_die(void) asmlinkage void __cpuinit secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; - unsigned int cpu = smp_processor_id(); + unsigned int cpu; + + cpu = smp_processor_id(); printk("CPU%u: Booted secondary processor\n", cpu); @@ -273,6 +275,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_flush_tlb_all(); cpu_init(); + preempt_disable(); /* * Give the platform a chance to do its own initialisation. diff --git a/arch/arm26/kernel/process.c b/arch/arm26/kernel/process.c index 9eb9964d32a73..15833a0057dd1 100644 --- a/arch/arm26/kernel/process.c +++ b/arch/arm26/kernel/process.c @@ -74,15 +74,13 @@ __setup("hlt", hlt_setup); void cpu_idle(void) { /* endless idle loop with no priority at all */ - preempt_disable(); while (1) { - while (!need_resched()) { - local_irq_disable(); - if (!need_resched() && !hlt_counter) - local_irq_enable(); - } + while (!need_resched()) + cpu_relax(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); } - schedule(); } static char reboot_mode = 'h'; diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 957f551ba5ce2..13867f4fad16a 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -161,6 +161,7 @@ void __init smp_callin(void) REG_WR(intr_vect, irq_regs[cpu], rw_mask, vect_mask); unmask_irq(IPI_INTR_VECT); unmask_irq(TIMER_INTR_VECT); + preempt_disable(); local_irq_enable(); cpu_set(cpu, cpu_online_map); diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 949a0e40e03cc..7c80afb104607 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -218,7 +218,9 @@ void cpu_idle (void) idle = default_idle; idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 3001b82b15148..54a452136f00e 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -77,16 +77,20 @@ void (*idle)(void) = core_sleep_idle; */ void cpu_idle(void) { + int cpu = smp_processor_id(); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { - irq_stat[smp_processor_id()].idle_timestamp = jiffies; + irq_stat[cpu].idle_timestamp = jiffies; if (!frv_dma_inprogress && idle) idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 27f1fce64ce46..fe21adf3e75e8 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -53,22 +53,18 @@ asmlinkage void ret_from_fork(void); #if !defined(CONFIG_H8300H_SIM) && !defined(CONFIG_H8S_SIM) void default_idle(void) { - while(1) { - if (!need_resched()) { - local_irq_enable(); - __asm__("sleep"); - local_irq_disable(); - } - schedule(); - } + local_irq_disable(); + if (!need_resched()) { + local_irq_enable(); + /* XXX: race here! What if need_resched() gets set now? */ + __asm__("sleep"); + } else + local_irq_enable(); } #else void default_idle(void) { - while(1) { - if (need_resched()) - schedule(); - } + cpu_relax(); } #endif void (*idle)(void) = default_idle; @@ -81,7 +77,13 @@ void (*idle)(void) = default_idle; */ void cpu_idle(void) { - idle(); + while (1) { + while (!need_resched()) + idle(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } } void machine_restart(char * __unused) diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 7a14fdfd3af95..5296e284ea363 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -179,7 +179,7 @@ static inline void play_dead(void) */ void cpu_idle(void) { - int cpu = raw_smp_processor_id(); + int cpu = smp_processor_id(); /* endless idle loop with no priority at all */ while (1) { @@ -201,7 +201,9 @@ void cpu_idle(void) __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 47ec76794d02a..bc5a9d97466b5 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -485,6 +485,7 @@ static void __devinit start_secondary(void *unused) * things done here to the most necessary things. */ cpu_init(); + preempt_disable(); smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) rep_nop(); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 051e050359e49..4c621fc3c3b90 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -292,7 +292,9 @@ cpu_idle (void) #ifdef CONFIG_SMP normal_xtp(); #endif + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); if (cpu_is_offline(smp_processor_id())) play_dead(); diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 400a489871249..8f44e7d2df66e 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -399,6 +399,7 @@ start_secondary (void *unused) Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); efi_map_pal_code(); cpu_init(); + preempt_disable(); smp_callin(); cpu_idle(); diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index ea13a8f4d8b05..cc4b571e5db71 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -104,7 +104,9 @@ void cpu_idle (void) idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index 640d592ea0725..b90c54169fa5a 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -426,6 +426,7 @@ void __init smp_cpus_done(unsigned int max_cpus) int __init start_secondary(void *unused) { cpu_init(); + preempt_disable(); smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) cpu_relax(); diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 11b1b90ba6ba4..13d109328a428 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -102,7 +102,9 @@ void cpu_idle(void) while (1) { while (!need_resched()) idle(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 4fe3d5715c416..dd725779d91fa 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -52,7 +52,9 @@ ATTRIB_NORET void cpu_idle(void) while (!need_resched()) if (cpu_wait) (*cpu_wait)(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index fcacf1aae98ae..25472fcaf7157 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -82,7 +82,7 @@ extern ATTRIB_NORET void cpu_idle(void); */ asmlinkage void start_secondary(void) { - unsigned int cpu = smp_processor_id(); + unsigned int cpu; cpu_probe(); cpu_report(); @@ -95,6 +95,8 @@ asmlinkage void start_secondary(void) */ calibrate_delay(); + preempt_disable(); + cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; prom_smp_finish(); diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 7fdca87ef6473..f482f78de4353 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -92,7 +92,9 @@ void cpu_idle(void) while (1) { while (!need_resched()) barrier(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); } } diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 5db3be4e2704b..a9ecf6465784e 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -463,6 +463,7 @@ void __init smp_callin(void) #endif smp_cpu_init(slave_id); + preempt_disable(); #if 0 /* NOT WORKING YET - see entry.S */ istack = (void *)__get_free_pages(GFP_KERNEL,ISTACK_ORDER); diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index d3e4bf756c838..0130f2619dacd 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -694,7 +694,9 @@ static void iseries_shared_idle(void) if (hvlpevent_is_pending()) process_iSeries_events(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } @@ -726,7 +728,9 @@ static void iseries_dedicated_idle(void) } ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e78c39368841b..4854f5eb5c3da 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -539,7 +539,9 @@ static void pseries_dedicated_idle(void) lpaca->lppaca.idle = 0; ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); @@ -583,7 +585,9 @@ static void pseries_shared_idle(void) lpaca->lppaca.idle = 0; ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); diff --git a/arch/ppc/kernel/idle.c b/arch/ppc/kernel/idle.c index 11e5b44713f72..a6141f05c9197 100644 --- a/arch/ppc/kernel/idle.c +++ b/arch/ppc/kernel/idle.c @@ -53,10 +53,6 @@ void default_idle(void) } #endif } - if (need_resched()) - schedule(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); } /* @@ -64,11 +60,22 @@ void default_idle(void) */ void cpu_idle(void) { - for (;;) + int cpu = smp_processor_id(); + + for (;;) { if (ppc_md.idle != NULL) ppc_md.idle(); else default_idle(); + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + if (need_resched()) { + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } + + } } #if defined(CONFIG_SYSCTL) && defined(CONFIG_6xx) diff --git a/arch/ppc/kernel/smp.c b/arch/ppc/kernel/smp.c index bc5bf1124836a..43b8fc2ca591c 100644 --- a/arch/ppc/kernel/smp.c +++ b/arch/ppc/kernel/smp.c @@ -341,6 +341,7 @@ int __devinit start_secondary(void *unused) cpu = smp_processor_id(); smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); + preempt_disable(); cpu_callin_map[cpu] = 1; printk("CPU %d done callin...\n", cpu); diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index 8fec274698022..909ea669af915 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -61,7 +61,9 @@ void default_idle(void) } ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } @@ -77,7 +79,9 @@ void native_idle(void) if (need_resched()) { ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } if (cpu_is_offline(smp_processor_id()) && diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 9f3dff6c0b722..66ca5757e3686 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -102,7 +102,6 @@ void default_idle(void) local_irq_disable(); if (need_resched()) { local_irq_enable(); - schedule(); return; } @@ -139,8 +138,14 @@ void default_idle(void) void cpu_idle(void) { - for (;;) - default_idle(); + for (;;) { + while (!need_resched()) + default_idle(); + + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } } void show_regs(struct pt_regs *regs) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index e13c87b446b2e..5856b3fda6bfe 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -533,6 +533,7 @@ int __devinit start_secondary(void *cpuvoid) { /* Setup the cpu */ cpu_init(); + preempt_disable(); /* init per CPU timer */ init_cpu_timer(); #ifdef CONFIG_VIRT_TIMER diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index 6dce9d0b81f8b..1cbc26b796ad1 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -64,7 +64,9 @@ void default_idle(void) cpu_sleep(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 5ecefc02896a3..59e49b18252c4 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -112,7 +112,9 @@ int __cpu_up(unsigned int cpu) int start_secondary(void *unused) { - unsigned int cpu = smp_processor_id(); + unsigned int cpu; + + cpu = smp_processor_id(); atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; @@ -120,6 +122,7 @@ int start_secondary(void *unused) smp_store_cpu_info(cpu); __smp_slave_init(cpu); + preempt_disable(); per_cpu_trap_init(); atomic_inc(&cpus_booted); diff --git a/arch/sh64/kernel/process.c b/arch/sh64/kernel/process.c index efde41c0cd669..0c09537449b3a 100644 --- a/arch/sh64/kernel/process.c +++ b/arch/sh64/kernel/process.c @@ -334,7 +334,9 @@ void default_idle(void) } local_irq_enable(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c index 29e72b57d4fd2..c39f4d01096d7 100644 --- a/arch/sparc/kernel/process.c +++ b/arch/sparc/kernel/process.c @@ -120,7 +120,9 @@ void cpu_idle(void) (*pm_idle)(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); } } @@ -133,7 +135,9 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while(1) { if(need_resched()) { + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); } barrier(); /* or else gcc optimizes... */ diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 7d10b03970919..2f89206e008f8 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -74,7 +74,9 @@ void cpu_idle(void) while (!need_resched()) barrier(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); } } @@ -93,7 +95,9 @@ void cpu_idle(void) if (need_resched()) { unidle_me(); clear_thread_flag(TIF_POLLING_NRFLAG); + preempt_enable_no_resched(); schedule(); + preempt_disable(); set_thread_flag(TIF_POLLING_NRFLAG); check_pgt_cache(); } diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 5d90ee9aebf1a..8aca4b1dc04e9 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -168,6 +168,9 @@ void __init smp_callin(void) rmb(); cpu_set(cpuid, cpu_online_map); + + /* idle thread is expected to have preempt disabled */ + preempt_disable(); } void cpu_panic(void) diff --git a/arch/v850/kernel/process.c b/arch/v850/kernel/process.c index 9c708c32c1f07..39cf247cdae4e 100644 --- a/arch/v850/kernel/process.c +++ b/arch/v850/kernel/process.c @@ -36,11 +36,8 @@ extern void ret_from_fork (void); /* The idle loop. */ void default_idle (void) { - while (1) { - while (! need_resched ()) - asm ("halt; nop; nop; nop; nop; nop" ::: "cc"); - schedule (); - } + while (! need_resched ()) + asm ("halt; nop; nop; nop; nop; nop" ::: "cc"); } void (*idle)(void) = default_idle; @@ -54,7 +51,14 @@ void (*idle)(void) = default_idle; void cpu_idle (void) { /* endless idle loop with no priority at all */ - (*idle) (); + while (1) { + while (!need_resched()) + (*idle) (); + + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } } /* diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index b5a89c0bdf591..571f9fe490ce1 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -204,7 +204,9 @@ void cpu_idle (void) idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 4b5b088ec1022..c4e59bbdc1872 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -472,6 +472,7 @@ void __cpuinit start_secondary(void) * things done here to the most necessary things. */ cpu_init(); + preempt_disable(); smp_callin(); /* otherwise gcc will move up the smp_processor_id before the cpu_init */ diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 08ef6d82ee514..6a44b54ae8173 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -96,8 +96,9 @@ void cpu_idle(void) while (1) { while (!need_resched()) platform_idle(); - preempt_enable(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/init/main.c b/init/main.c index f142d40353419..27f97f9b46362 100644 --- a/init/main.c +++ b/init/main.c @@ -394,14 +394,16 @@ static void noinline rest_init(void) kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND); numa_default_policy(); unlock_kernel(); - preempt_enable_no_resched(); /* * The boot idle thread must execute schedule() * at least one to get things moving: */ + preempt_enable_no_resched(); schedule(); + preempt_disable(); + /* Call into cpu_idle with preempt disabled */ cpu_idle(); }