diff --git a/include/errno.h b/include/errno.h index d95ba2c6f..4985335ec 100644 --- a/include/errno.h +++ b/include/errno.h @@ -130,4 +130,6 @@ extern int errno; /* place where the error numbers go */ #define EBADEPT (_SIGN 301) /* specified endpoint is bad */ #define EDEADEPT (_SIGN 302) /* specified endpoint is not alive */ +#define EBADCPU (_SIGN 1000) /* requested CPU does not work */ + #endif /* _ERRNO_H */ diff --git a/kernel/arch/i386/arch_smp.c b/kernel/arch/i386/arch_smp.c index 4f1445742..1d4a59bf8 100644 --- a/kernel/arch/i386/arch_smp.c +++ b/kernel/arch/i386/arch_smp.c @@ -36,8 +36,8 @@ extern void * __trampoline_end; extern u32_t busclock[CONFIG_MAX_CPUS]; extern int panicking; -static int ap_cpu_ready; -static int cpu_down; +static int volatile ap_cpu_ready; +static int volatile cpu_down; /* there can be at most 255 local APIC ids, each fits in 8 bits */ PRIVATE unsigned char apicid2cpuid[255]; @@ -186,6 +186,11 @@ PUBLIC void smp_shutdown_aps(void) for (cpu = 0; cpu < ncpus; cpu++) { if (cpu == cpuid) continue; + if (!cpu_test_flag(cpu, CPU_IS_READY)) { + printf("CPU %d didn't boot\n", cpu); + continue; + } + cpu_down = -1; barrier(); apic_send_ipi(APIC_SMP_CPU_HALT_VECTOR, cpu, APIC_IPI_DEST); diff --git a/kernel/smp.c b/kernel/smp.c index 38d503457..2dd730491 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -27,9 +27,20 @@ SPINLOCK_DEFINE(boot_lock) PUBLIC void wait_for_APs_to_finish_booting(void) { + unsigned n = 0; + int i; + + /* check how many cpus are actually alive */ + for (i = 0 ; i < ncpus ; i++) { + if (cpu_test_flag(i, CPU_IS_READY)) + n++; + } + if (n != ncpus) + printf("WARNING only %d out of %d cpus booted\n", n, ncpus); + /* we must let the other CPUs to run in kernel mode first */ BKL_UNLOCK(); - while (ap_cpus_booted != (ncpus - 1)) + while (ap_cpus_booted != (n - 1)) arch_pause(); /* now we have to take the lock again as we continu execution */ BKL_LOCK(); diff --git a/kernel/system.c b/kernel/system.c index 0c79ae7a6..cf0fe9ba0 100644 --- a/kernel/system.c +++ b/kernel/system.c @@ -653,6 +653,8 @@ PUBLIC int sched_proc(struct proc *p, #ifdef CONFIG_SMP if ((cpu < 0 && cpu != -1) || (cpu > 0 && (unsigned) cpu >= ncpus)) return(EINVAL); + if (cpu != -1 && !(cpu_is_ready(cpu))) + return EBADCPU; #endif /* In some cases, we might be rescheduling a runnable process. In such diff --git a/servers/pm/glo.h b/servers/pm/glo.h index 76feac5c1..69631d990 100644 --- a/servers/pm/glo.h +++ b/servers/pm/glo.h @@ -29,5 +29,5 @@ EXTERN char monitor_code[256]; EXTERN struct machine machine; /* machine info */ #ifdef CONFIG_SMP -EXTERN unsigned cpu_proc[CONFIG_MAX_CPUS]; +EXTERN int cpu_proc[CONFIG_MAX_CPUS]; #endif diff --git a/servers/sched/schedule.c b/servers/sched/schedule.c index e73e3ac4e..39e8986e7 100644 --- a/servers/sched/schedule.c +++ b/servers/sched/schedule.c @@ -38,6 +38,10 @@ FORWARD _PROTOTYPE( void balance_queues, (struct timer *tp) ); #define schedule_process_migrate(p) \ schedule_process(p, SCHEDULE_CHANGE_CPU) +#define CPU_DEAD -1 + +#define cpu_is_available(c) (cpu_proc[c] >= 0) + #define DEFAULT_USER_TIME_SLICE 200 /* processes created by RS are sysytem processes */ @@ -62,7 +66,12 @@ PRIVATE void pick_cpu(struct schedproc * proc) return; } + /* if no other cpu available, try BSP */ + cpu = machine.bsp_id; for (c = 0; c < machine.processors_count; c++) { + /* skip dead cpus */ + if (!cpu_is_available(c)) + continue; if (c != machine.bsp_id && cpu_load > cpu_proc[c]) { cpu_load = cpu_proc[c]; cpu = c; @@ -218,7 +227,13 @@ PUBLIC int do_start_scheduling(message *m_ptr) /* Schedule the process, giving it some quantum */ pick_cpu(rmp); - if ((rv = schedule_process(rmp, SCHEDULE_CHANGE_ALL)) != OK) { + while ((rv = schedule_process(rmp, SCHEDULE_CHANGE_ALL)) == EBADCPU) { + /* don't try this CPU ever again */ + cpu_proc[rmp->cpu] = CPU_DEAD; + pick_cpu(rmp); + } + + if (rv != OK) { printf("Sched: Error while scheduling process, kernel replied %d\n", rv); return rv;