2e084786f6
I found a bug which can be reproduced by this way:(linux-2.6.26-rc5, x86-64) (use 2^32, 2^33, ...., 2^63 as shares value) # mkdir /dev/cpuctl # mount -t cgroup -o cpu cpuctl /dev/cpuctl # cd /dev/cpuctl # mkdir sub # echo 0x8000000000000000 > sub/cpu.shares # echo $$ > sub/tasks oops here! divide by zero. This is because do_div() expects the 2th parameter to be 32 bits, but unsigned long is 64 bits in x86_64. Peter Zijstra pointed it out that the sane thing to do is limit the shares value to something smaller instead of using an even more expensive divide. Also, I found another bug about "the shares value is too large": pid1 and pid2 are set affinity to cpu#0 pid1 is attached to cg1 and pid2 is attached to cg2 if cg1/cpu.shares = 1024 cg2/cpu.shares = 2000000000 then pid2 got 100% usage of cpu, and pid1 0% if cg1/cpu.shares = 1024 cg2/cpu.shares = 20000000000 then pid2 got 0% usage of cpu, and pid1 100% And a weight of a cfs_rq is the sum of weights of which entities are queued on this cfs_rq, so the shares value should be limited to a smaller value. I think that (1UL << 18) is a good limited value: 1) it's not too large, we can create a lot of group before overflow 2) it's several times the weight value for nice=-19 (not too small) Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu> |
||
---|---|---|
.. | ||
irq | ||
power | ||
time | ||
.gitignore | ||
acct.c | ||
audit_tree.c | ||
audit.c | ||
audit.h | ||
auditfilter.c | ||
auditsc.c | ||
backtracetest.c | ||
bounds.c | ||
capability.c | ||
cgroup_debug.c | ||
cgroup.c | ||
compat.c | ||
configs.c | ||
cpu.c | ||
cpuset.c | ||
delayacct.c | ||
dma.c | ||
exec_domain.c | ||
exit.c | ||
extable.c | ||
fork.c | ||
futex_compat.c | ||
futex.c | ||
hrtimer.c | ||
itimer.c | ||
kallsyms.c | ||
Kconfig.hz | ||
Kconfig.preempt | ||
kexec.c | ||
kfifo.c | ||
kgdb.c | ||
kmod.c | ||
kprobes.c | ||
ksysfs.c | ||
kthread.c | ||
latencytop.c | ||
lockdep_internals.h | ||
lockdep_proc.c | ||
lockdep.c | ||
Makefile | ||
marker.c | ||
module.c | ||
mutex-debug.c | ||
mutex-debug.h | ||
mutex.c | ||
mutex.h | ||
notifier.c | ||
ns_cgroup.c | ||
nsproxy.c | ||
panic.c | ||
params.c | ||
pid_namespace.c | ||
pid.c | ||
pm_qos_params.c | ||
posix-cpu-timers.c | ||
posix-timers.c | ||
printk.c | ||
profile.c | ||
ptrace.c | ||
rcuclassic.c | ||
rcupdate.c | ||
rcupreempt_trace.c | ||
rcupreempt.c | ||
rcutorture.c | ||
relay.c | ||
res_counter.c | ||
resource.c | ||
rtmutex_common.h | ||
rtmutex-debug.c | ||
rtmutex-debug.h | ||
rtmutex-tester.c | ||
rtmutex.c | ||
rtmutex.h | ||
rwsem.c | ||
sched_clock.c | ||
sched_debug.c | ||
sched_fair.c | ||
sched_features.h | ||
sched_idletask.c | ||
sched_rt.c | ||
sched_stats.h | ||
sched.c | ||
seccomp.c | ||
semaphore.c | ||
signal.c | ||
softirq.c | ||
softlockup.c | ||
spinlock.c | ||
srcu.c | ||
stacktrace.c | ||
stop_machine.c | ||
sys_ni.c | ||
sys.c | ||
sysctl_check.c | ||
sysctl.c | ||
taskstats.c | ||
test_kprobes.c | ||
time.c | ||
timeconst.pl | ||
timer.c | ||
tsacct.c | ||
uid16.c | ||
user_namespace.c | ||
user.c | ||
utsname_sysctl.c | ||
utsname.c | ||
wait.c | ||
workqueue.c |