DEFINE_TRACE(sched_migrate_task);
#ifdef CONFIG_SMP
+
+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
+
/*
* Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
* Since cpu_power is a 'constant', we can use a reciprocal divide.
* slice expiry etc.
*/
-#define WEIGHT_IDLEPRIO 2
-#define WMULT_IDLEPRIO (1 << 31)
+#define WEIGHT_IDLEPRIO 3
+#define WMULT_IDLEPRIO 1431655765
/*
* Nice levels are multiplicative, with a gentle 10% change for every
}
double_unlock_balance(this_rq, busiest);
+ /*
+ * Should not call ttwu while holding a rq->lock
+ */
+ spin_unlock(&this_rq->lock);
if (active_balance)
wake_up_process(busiest->migration_thread);
+ spin_lock(&this_rq->lock);
} else
sd->nr_balance_failed = 0;
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in user space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
*/
-void account_user_time(struct task_struct *p, cputime_t cputime)
+void account_user_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
cputime64_t tmp;
+ /* Add user time to process. */
p->utime = cputime_add(p->utime, cputime);
+ p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
account_group_user_time(p, cputime);
/* Add user time to cpustat. */
* Account guest cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in virtual machine since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
*/
-static void account_guest_time(struct task_struct *p, cputime_t cputime)
+static void account_guest_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled)
{
cputime64_t tmp;
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
tmp = cputime_to_cputime64(cputime);
+ /* Add guest time to process. */
p->utime = cputime_add(p->utime, cputime);
+ p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
account_group_user_time(p, cputime);
p->gtime = cputime_add(p->gtime, cputime);
+ /* Add guest time to cpustat. */
cpustat->user = cputime64_add(cpustat->user, tmp);
cpustat->guest = cputime64_add(cpustat->guest, tmp);
}
-/*
- * Account scaled user cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @cputime: the cpu time spent in user space since the last update
- */
-void account_user_time_scaled(struct task_struct *p, cputime_t cputime)
-{
- p->utimescaled = cputime_add(p->utimescaled, cputime);
-}
-
/*
* Account system cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count()
* @cputime: the cpu time spent in kernel space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
*/
void account_system_time(struct task_struct *p, int hardirq_offset,
- cputime_t cputime)
+ cputime_t cputime, cputime_t cputime_scaled)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
- struct rq *rq = this_rq();
cputime64_t tmp;
if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
- account_guest_time(p, cputime);
+ account_guest_time(p, cputime, cputime_scaled);
return;
}
+ /* Add system time to process. */
p->stime = cputime_add(p->stime, cputime);
+ p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
account_group_system_time(p, cputime);
/* Add system time to cpustat. */
cpustat->irq = cputime64_add(cpustat->irq, tmp);
else if (softirq_count())
cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
- else if (p != rq->idle)
- cpustat->system = cputime64_add(cpustat->system, tmp);
- else if (atomic_read(&rq->nr_iowait) > 0)
- cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
else
- cpustat->idle = cputime64_add(cpustat->idle, tmp);
+ cpustat->system = cputime64_add(cpustat->system, tmp);
+
/* Account for system time used */
acct_update_integrals(p);
}
/*
- * Account scaled system cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @hardirq_offset: the offset to subtract from hardirq_count()
- * @cputime: the cpu time spent in kernel space since the last update
+ * Account for involuntary wait time.
+ * @steal: the cpu time spent in involuntary wait
*/
-void account_system_time_scaled(struct task_struct *p, cputime_t cputime)
+void account_steal_time(cputime_t cputime)
{
- p->stimescaled = cputime_add(p->stimescaled, cputime);
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ cputime64_t cputime64 = cputime_to_cputime64(cputime);
+
+ cpustat->steal = cputime64_add(cpustat->steal, cputime64);
}
/*
- * Account for involuntary wait time.
- * @p: the process from which the cpu time has been stolen
- * @steal: the cpu time spent in involuntary wait
+ * Account for idle time.
+ * @cputime: the cpu time spent in idle wait
*/
-void account_steal_time(struct task_struct *p, cputime_t steal)
+void account_idle_time(cputime_t cputime)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
- cputime64_t tmp = cputime_to_cputime64(steal);
+ cputime64_t cputime64 = cputime_to_cputime64(cputime);
struct rq *rq = this_rq();
- if (p == rq->idle) {
- p->stime = cputime_add(p->stime, steal);
- if (atomic_read(&rq->nr_iowait) > 0)
- cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
- else
- cpustat->idle = cputime64_add(cpustat->idle, tmp);
- } else
- cpustat->steal = cputime64_add(cpustat->steal, tmp);
+ if (atomic_read(&rq->nr_iowait) > 0)
+ cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
+ else
+ cpustat->idle = cputime64_add(cpustat->idle, cputime64);
+}
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+
+/*
+ * Account a single tick of cpu time.
+ * @p: the process that the cpu time gets accounted to
+ * @user_tick: indicates if the tick is a user or a system tick
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+ cputime_t one_jiffy = jiffies_to_cputime(1);
+ cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
+ struct rq *rq = this_rq();
+
+ if (user_tick)
+ account_user_time(p, one_jiffy, one_jiffy_scaled);
+ else if (p != rq->idle)
+ account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
+ one_jiffy_scaled);
+ else
+ account_idle_time(one_jiffy);
+}
+
+/*
+ * Account multiple ticks of steal time.
+ * @p: the process from which the cpu time has been stolen
+ * @ticks: number of stolen ticks
+ */
+void account_steal_ticks(unsigned long ticks)
+{
+ account_steal_time(jiffies_to_cputime(ticks));
+}
+
+/*
+ * Account multiple ticks of idle time.
+ * @ticks: number of stolen ticks
+ */
+void account_idle_ticks(unsigned long ticks)
+{
+ account_idle_time(jiffies_to_cputime(ticks));
}
+#endif
+
/*
* Use precise platform statistics if available:
*/
/*
* Underflow?
*/
- if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
+ if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
return;
/*
* Is the spinlock portion underflowing?
* sys_setpriority is a more generic, but much slower function that
* does similar things.
*/
-asmlinkage long sys_nice(int increment)
+SYSCALL_DEFINE1(nice, int, increment)
{
long nice, retval;
* @policy: new policy.
* @param: structure containing the new RT priority.
*/
-asmlinkage long
-sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
+ struct sched_param __user *, param)
{
/* negative values for policy are not valid */
if (policy < 0)
* @pid: the pid in question.
* @param: structure containing the new RT priority.
*/
-asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
{
return do_sched_setscheduler(pid, -1, param);
}
* sys_sched_getscheduler - get the policy (scheduling class) of a thread
* @pid: the pid in question.
*/
-asmlinkage long sys_sched_getscheduler(pid_t pid)
+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
{
struct task_struct *p;
int retval;
* @pid: the pid in question.
* @param: structure containing the RT priority.
*/
-asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
{
struct sched_param lp;
struct task_struct *p;
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to the new cpu mask
*/
-asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
- unsigned long __user *user_mask_ptr)
+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
+ unsigned long __user *, user_mask_ptr)
{
cpumask_var_t new_mask;
int retval;
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to hold the current cpu mask
*/
-asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
- unsigned long __user *user_mask_ptr)
+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
+ unsigned long __user *, user_mask_ptr)
{
int ret;
cpumask_var_t mask;
* This function yields the current CPU to other tasks. If there are no
* other threads running on this CPU then this function will return.
*/
-asmlinkage long sys_sched_yield(void)
+SYSCALL_DEFINE0(sched_yield)
{
struct rq *rq = this_rq_lock();
* this syscall returns the maximum rt_priority that can be used
* by a given scheduling class.
*/
-asmlinkage long sys_sched_get_priority_max(int policy)
+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
{
int ret = -EINVAL;
* this syscall returns the minimum rt_priority that can be used
* by a given scheduling class.
*/
-asmlinkage long sys_sched_get_priority_min(int policy)
+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
{
int ret = -EINVAL;
* this syscall writes the default timeslice value of a given process
* into the user-space timespec buffer. A value of '0' means infinity.
*/
-asmlinkage
-long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ struct timespec __user *, interval)
{
struct task_struct *p;
unsigned int time_slice;
spin_unlock_irqrestore(&rq->lock, flags);
}
-static int init_rootdomain(struct root_domain *rd, bool bootmem)
+static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
{
memset(rd, 0, sizeof(*rd));
}
if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
- goto free_rd;
+ goto out;
if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
goto free_span;
if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
free_cpumask_var(rd->online);
free_span:
free_cpumask_var(rd->span);
-free_rd:
- kfree(rd);
+out:
return -ENOMEM;
}
* groups, so roll our own. Now each node has its own list of groups which
* gets dynamically allocated.
*/
-static DEFINE_PER_CPU(struct sched_domain, node_domains);
+static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
static struct sched_group ***sched_group_nodes_bycpu;
-static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
+static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
#ifdef CONFIG_NUMA
if (cpumask_weight(cpu_map) >
SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
- sd = &per_cpu(allnodes_domains, i);
+ sd = &per_cpu(allnodes_domains, i).sd;
SD_INIT(sd, ALLNODES);
set_domain_attribute(sd, attr);
cpumask_copy(sched_domain_span(sd), cpu_map);
} else
p = NULL;
- sd = &per_cpu(node_domains, i);
+ sd = &per_cpu(node_domains, i).sd;
SD_INIT(sd, NODE);
set_domain_attribute(sd, attr);
sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
for_each_cpu(j, nodemask) {
struct sched_domain *sd;
- sd = &per_cpu(node_domains, j);
+ sd = &per_cpu(node_domains, j).sd;
sd->groups = sg;
}
sg->__cpu_power = 0;
}
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-int arch_reinit_sched_domains(void)
+static void arch_reinit_sched_domains(void)
{
get_online_cpus();
rebuild_sched_domains();
put_online_cpus();
-
- return 0;
}
static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
{
- int ret;
unsigned int level = 0;
if (sscanf(buf, "%u", &level) != 1)
else
sched_mc_power_savings = level;
- ret = arch_reinit_sched_domains();
+ arch_reinit_sched_domains();
- return ret ? ret : count;
+ return count;
}
#ifdef CONFIG_SCHED_MC
sched_smt_power_savings_store);
#endif
-int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
+int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
{
int err = 0;
runtime = d->rt_runtime;
}
+#ifdef CONFIG_USER_SCHED
+ if (tg == &root_task_group) {
+ period = global_rt_period();
+ runtime = global_rt_runtime();
+ }
+#endif
+
/*
* Cannot have more runtime than the period.
*/