summaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c772
1 files changed, 55 insertions, 717 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5b60f3a8343f3..17c667b427b4a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10,6 +10,7 @@
#include <uapi/linux/sched/types.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/hotplug.h>
+#include <linux/wait_bit.h>
#include <linux/cpuset.h>
#include <linux/delayacct.h>
#include <linux/init_task.h>
@@ -788,36 +789,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
dequeue_task(rq, p, flags);
}
-void sched_set_stop_task(int cpu, struct task_struct *stop)
-{
- struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
- struct task_struct *old_stop = cpu_rq(cpu)->stop;
-
- if (stop) {
- /*
- * Make it appear like a SCHED_FIFO task, its something
- * userspace knows about and won't get confused about.
- *
- * Also, it will make PI more or less work without too
- * much confusion -- but then, stop work should not
- * rely on PI working anyway.
- */
- sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
-
- stop->sched_class = &stop_sched_class;
- }
-
- cpu_rq(cpu)->stop = stop;
-
- if (old_stop) {
- /*
- * Reset it back to a normal scheduling class so that
- * it can die in pieces.
- */
- old_stop->sched_class = &rt_sched_class;
- }
-}
-
/*
* __normal_prio - return the priority that is based on the static prio
*/
@@ -1588,6 +1559,36 @@ static void update_avg(u64 *avg, u64 sample)
*avg += diff >> 3;
}
+void sched_set_stop_task(int cpu, struct task_struct *stop)
+{
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+ struct task_struct *old_stop = cpu_rq(cpu)->stop;
+
+ if (stop) {
+ /*
+ * Make it appear like a SCHED_FIFO task, its something
+ * userspace knows about and won't get confused about.
+ *
+ * Also, it will make PI more or less work without too
+ * much confusion -- but then, stop work should not
+ * rely on PI working anyway.
+ */
+ sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
+
+ stop->sched_class = &stop_sched_class;
+ }
+
+ cpu_rq(cpu)->stop = stop;
+
+ if (old_stop) {
+ /*
+ * Reset it back to a normal scheduling class so that
+ * it can die in pieces.
+ */
+ old_stop->sched_class = &rt_sched_class;
+ }
+}
+
#else
static inline int __set_cpus_allowed_ptr(struct task_struct *p,
@@ -1731,7 +1732,7 @@ void sched_ttwu_pending(void)
{
struct rq *rq = this_rq();
struct llist_node *llist = llist_del_all(&rq->wake_list);
- struct task_struct *p;
+ struct task_struct *p, *t;
struct rq_flags rf;
if (!llist)
@@ -1740,17 +1741,8 @@ void sched_ttwu_pending(void)
rq_lock_irqsave(rq, &rf);
update_rq_clock(rq);
- while (llist) {
- int wake_flags = 0;
-
- p = llist_entry(llist, struct task_struct, wake_entry);
- llist = llist_next(llist);
-
- if (p->sched_remote_wakeup)
- wake_flags = WF_MIGRATED;
-
- ttwu_do_activate(rq, p, wake_flags, &rf);
- }
+ llist_for_each_entry_safe(p, t, llist, wake_entry)
+ ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
rq_unlock_irqrestore(rq, &rf);
}
@@ -2148,23 +2140,6 @@ int wake_up_state(struct task_struct *p, unsigned int state)
}
/*
- * This function clears the sched_dl_entity static params.
- */
-void __dl_clear_params(struct task_struct *p)
-{
- struct sched_dl_entity *dl_se = &p->dl;
-
- dl_se->dl_runtime = 0;
- dl_se->dl_deadline = 0;
- dl_se->dl_period = 0;
- dl_se->flags = 0;
- dl_se->dl_bw = 0;
-
- dl_se->dl_throttled = 0;
- dl_se->dl_yielded = 0;
-}
-
-/*
* Perform scheduler related setup for a newly forked process p.
* p is forked by current.
*
@@ -2193,6 +2168,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
RB_CLEAR_NODE(&p->dl.rb_node);
init_dl_task_timer(&p->dl);
+ init_dl_inactive_task_timer(&p->dl);
__dl_clear_params(p);
INIT_LIST_HEAD(&p->rt.run_list);
@@ -2430,7 +2406,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
unsigned long to_ratio(u64 period, u64 runtime)
{
if (runtime == RUNTIME_INF)
- return 1ULL << 20;
+ return BW_UNIT;
/*
* Doing this here saves a lot of checks in all
@@ -2440,93 +2416,9 @@ unsigned long to_ratio(u64 period, u64 runtime)
if (period == 0)
return 0;
- return div64_u64(runtime << 20, period);
+ return div64_u64(runtime << BW_SHIFT, period);
}
-#ifdef CONFIG_SMP
-inline struct dl_bw *dl_bw_of(int i)
-{
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
- "sched RCU must be held");
- return &cpu_rq(i)->rd->dl_bw;
-}
-
-static inline int dl_bw_cpus(int i)
-{
- struct root_domain *rd = cpu_rq(i)->rd;
- int cpus = 0;
-
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
- "sched RCU must be held");
- for_each_cpu_and(i, rd->span, cpu_active_mask)
- cpus++;
-
- return cpus;
-}
-#else
-inline struct dl_bw *dl_bw_of(int i)
-{
- return &cpu_rq(i)->dl.dl_bw;
-}
-
-static inline int dl_bw_cpus(int i)
-{
- return 1;
-}
-#endif
-
-/*
- * We must be sure that accepting a new task (or allowing changing the
- * parameters of an existing one) is consistent with the bandwidth
- * constraints. If yes, this function also accordingly updates the currently
- * allocated bandwidth to reflect the new situation.
- *
- * This function is called while holding p's rq->lock.
- *
- * XXX we should delay bw change until the task's 0-lag point, see
- * __setparam_dl().
- */
-static int dl_overflow(struct task_struct *p, int policy,
- const struct sched_attr *attr)
-{
-
- struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
- u64 period = attr->sched_period ?: attr->sched_deadline;
- u64 runtime = attr->sched_runtime;
- u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
- int cpus, err = -1;
-
- /* !deadline task may carry old deadline bandwidth */
- if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
- return 0;
-
- /*
- * Either if a task, enters, leave, or stays -deadline but changes
- * its parameters, we may need to update accordingly the total
- * allocated bandwidth of the container.
- */
- raw_spin_lock(&dl_b->lock);
- cpus = dl_bw_cpus(task_cpu(p));
- if (dl_policy(policy) && !task_has_dl_policy(p) &&
- !__dl_overflow(dl_b, cpus, 0, new_bw)) {
- __dl_add(dl_b, new_bw);
- err = 0;
- } else if (dl_policy(policy) && task_has_dl_policy(p) &&
- !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
- __dl_clear(dl_b, p->dl.dl_bw);
- __dl_add(dl_b, new_bw);
- err = 0;
- } else if (!dl_policy(policy) && task_has_dl_policy(p)) {
- __dl_clear(dl_b, p->dl.dl_bw);
- err = 0;
- }
- raw_spin_unlock(&dl_b->lock);
-
- return err;
-}
-
-extern void init_dl_bw(struct dl_bw *dl_b);
-
/*
* wake_up_new_task - wake up a newly created task for the first time.
*
@@ -3687,7 +3579,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
exception_exit(prev_state);
}
-int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
void *key)
{
return try_to_wake_up(curr->private, mode, wake_flags);
@@ -4009,46 +3901,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
}
/*
- * This function initializes the sched_dl_entity of a newly becoming
- * SCHED_DEADLINE task.
- *
- * Only the static values are considered here, the actual runtime and the
- * absolute deadline will be properly calculated when the task is enqueued
- * for the first time with its new policy.
- */
-static void
-__setparam_dl(struct task_struct *p, const struct sched_attr *attr)
-{
- struct sched_dl_entity *dl_se = &p->dl;
-
- dl_se->dl_runtime = attr->sched_runtime;
- dl_se->dl_deadline = attr->sched_deadline;
- dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
- dl_se->flags = attr->sched_flags;
- dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
-
- /*
- * Changing the parameters of a task is 'tricky' and we're not doing
- * the correct thing -- also see task_dead_dl() and switched_from_dl().
- *
- * What we SHOULD do is delay the bandwidth release until the 0-lag
- * point. This would include retaining the task_struct until that time
- * and change dl_overflow() to not immediately decrement the current
- * amount.
- *
- * Instead we retain the current runtime/deadline and let the new
- * parameters take effect after the current reservation period lapses.
- * This is safe (albeit pessimistic) because the 0-lag point is always
- * before the current scheduling deadline.
- *
- * We can still have temporary overloads because we do not delay the
- * change in bandwidth until that time; so admission control is
- * not on the safe side. It does however guarantee tasks will never
- * consume more than promised.
- */
-}
-
-/*
* sched_setparam() passes in -1 for its policy, to let the functions
* it calls know not to change it.
*/
@@ -4101,59 +3953,6 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
p->sched_class = &fair_sched_class;
}
-static void
-__getparam_dl(struct task_struct *p, struct sched_attr *attr)
-{
- struct sched_dl_entity *dl_se = &p->dl;
-
- attr->sched_priority = p->rt_priority;
- attr->sched_runtime = dl_se->dl_runtime;
- attr->sched_deadline = dl_se->dl_deadline;
- attr->sched_period = dl_se->dl_period;
- attr->sched_flags = dl_se->flags;
-}
-
-/*
- * This function validates the new parameters of a -deadline task.
- * We ask for the deadline not being zero, and greater or equal
- * than the runtime, as well as the period of being zero or
- * greater than deadline. Furthermore, we have to be sure that
- * user parameters are above the internal resolution of 1us (we
- * check sched_runtime only since it is always the smaller one) and
- * below 2^63 ns (we have to check both sched_deadline and
- * sched_period, as the latter can be zero).
- */
-static bool
-__checkparam_dl(const struct sched_attr *attr)
-{
- /* deadline != 0 */
- if (attr->sched_deadline == 0)
- return false;
-
- /*
- * Since we truncate DL_SCALE bits, make sure we're at least
- * that big.
- */
- if (attr->sched_runtime < (1ULL << DL_SCALE))
- return false;
-
- /*
- * Since we use the MSB for wrap-around and sign issues, make
- * sure it's not set (mind that period can be equal to zero).
- */
- if (attr->sched_deadline & (1ULL << 63) ||
- attr->sched_period & (1ULL << 63))
- return false;
-
- /* runtime <= deadline <= period (if period != 0) */
- if ((attr->sched_period != 0 &&
- attr->sched_period < attr->sched_deadline) ||
- attr->sched_deadline < attr->sched_runtime)
- return false;
-
- return true;
-}
-
/*
* Check the target process has a UID that matches the current process's:
*/
@@ -4170,19 +3969,6 @@ static bool check_same_owner(struct task_struct *p)
return match;
}
-static bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
-{
- struct sched_dl_entity *dl_se = &p->dl;
-
- if (dl_se->dl_runtime != attr->sched_runtime ||
- dl_se->dl_deadline != attr->sched_deadline ||
- dl_se->dl_period != attr->sched_period ||
- dl_se->flags != attr->sched_flags)
- return true;
-
- return false;
-}
-
static int __sched_setscheduler(struct task_struct *p,
const struct sched_attr *attr,
bool user, bool pi)
@@ -4197,8 +3983,8 @@ static int __sched_setscheduler(struct task_struct *p,
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
struct rq *rq;
- /* May grab non-irq protected spin_locks: */
- BUG_ON(in_interrupt());
+ /* The pi code expects interrupts enabled */
+ BUG_ON(pi && in_interrupt());
recheck:
/* Double check policy once rq lock held: */
if (policy < 0) {
@@ -4211,7 +3997,8 @@ recheck:
return -EINVAL;
}
- if (attr->sched_flags & ~(SCHED_FLAG_RESET_ON_FORK))
+ if (attr->sched_flags &
+ ~(SCHED_FLAG_RESET_ON_FORK | SCHED_FLAG_RECLAIM))
return -EINVAL;
/*
@@ -4362,7 +4149,7 @@ change:
* of a SCHED_DEADLINE task) we need to check if enough bandwidth
* is available.
*/
- if ((dl_policy(policy) || dl_task(p)) && dl_overflow(p, policy, attr)) {
+ if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) {
task_rq_unlock(rq, p, &rf);
return -EBUSY;
}
@@ -5463,26 +5250,17 @@ void init_idle(struct task_struct *idle, int cpu)
#endif
}
+#ifdef CONFIG_SMP
+
int cpuset_cpumask_can_shrink(const struct cpumask *cur,
const struct cpumask *trial)
{
- int ret = 1, trial_cpus;
- struct dl_bw *cur_dl_b;
- unsigned long flags;
+ int ret = 1;
if (!cpumask_weight(cur))
return ret;
- rcu_read_lock_sched();
- cur_dl_b = dl_bw_of(cpumask_any(cur));
- trial_cpus = cpumask_weight(trial);
-
- raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
- if (cur_dl_b->bw != -1 &&
- cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
- ret = 0;
- raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
- rcu_read_unlock_sched();
+ ret = dl_cpuset_cpumask_can_shrink(cur, trial);
return ret;
}
@@ -5506,43 +5284,14 @@ int task_can_attach(struct task_struct *p,
goto out;
}
-#ifdef CONFIG_SMP
if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
- cs_cpus_allowed)) {
- unsigned int dest_cpu = cpumask_any_and(cpu_active_mask,
- cs_cpus_allowed);
- struct dl_bw *dl_b;
- bool overflow;
- int cpus;
- unsigned long flags;
-
- rcu_read_lock_sched();
- dl_b = dl_bw_of(dest_cpu);
- raw_spin_lock_irqsave(&dl_b->lock, flags);
- cpus = dl_bw_cpus(dest_cpu);
- overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
- if (overflow)
- ret = -EBUSY;
- else {
- /*
- * We reserve space for this task in the destination
- * root_domain, as we can't fail after this point.
- * We will free resources in the source root_domain
- * later on (see set_cpus_allowed_dl()).
- */
- __dl_add(dl_b, p->dl.dl_bw);
- }
- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
- rcu_read_unlock_sched();
+ cs_cpus_allowed))
+ ret = dl_task_can_attach(p, cs_cpus_allowed);
- }
-#endif
out:
return ret;
}
-#ifdef CONFIG_SMP
-
bool sched_smp_initialized __read_mostly;
#ifdef CONFIG_NUMA_BALANCING
@@ -5805,23 +5554,8 @@ static void cpuset_cpu_active(void)
static int cpuset_cpu_inactive(unsigned int cpu)
{
- unsigned long flags;
- struct dl_bw *dl_b;
- bool overflow;
- int cpus;
-
if (!cpuhp_tasks_frozen) {
- rcu_read_lock_sched();
- dl_b = dl_bw_of(cpu);
-
- raw_spin_lock_irqsave(&dl_b->lock, flags);
- cpus = dl_bw_cpus(cpu);
- overflow = __dl_overflow(dl_b, cpus, 0, 0);
- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
- rcu_read_unlock_sched();
-
- if (overflow)
+ if (dl_cpu_busy(cpu))
return -EBUSY;
cpuset_update_active_cpus();
} else {
@@ -5952,7 +5686,6 @@ void __init sched_init_smp(void)
cpumask_var_t non_isolated_cpus;
alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
- alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
sched_init_numa();
@@ -5962,7 +5695,7 @@ void __init sched_init_smp(void)
* happen.
*/
mutex_lock(&sched_domains_mutex);
- init_sched_domains(cpu_active_mask);
+ sched_init_domains(cpu_active_mask);
cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
if (cpumask_empty(non_isolated_cpus))
cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@ -5978,7 +5711,6 @@ void __init sched_init_smp(void)
init_sched_dl_class();
sched_init_smt();
- sched_clock_init_late();
sched_smp_initialized = true;
}
@@ -5994,7 +5726,6 @@ early_initcall(migration_init);
void __init sched_init_smp(void)
{
sched_init_granularity();
- sched_clock_init_late();
}
#endif /* CONFIG_SMP */
@@ -6020,28 +5751,13 @@ static struct kmem_cache *task_group_cache __read_mostly;
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
-#define WAIT_TABLE_BITS 8
-#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
-static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
-
-wait_queue_head_t *bit_waitqueue(void *word, int bit)
-{
- const int shift = BITS_PER_LONG == 32 ? 5 : 6;
- unsigned long val = (unsigned long)word << shift | bit;
-
- return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
-}
-EXPORT_SYMBOL(bit_waitqueue);
-
void __init sched_init(void)
{
int i, j;
unsigned long alloc_size = 0, ptr;
sched_clock_init();
-
- for (i = 0; i < WAIT_TABLE_SIZE; i++)
- init_waitqueue_head(bit_wait_table + i);
+ wait_bit_init();
#ifdef CONFIG_FAIR_GROUP_SCHED
alloc_size += 2 * nr_cpu_ids * sizeof(void **);
@@ -6193,7 +5909,6 @@ void __init sched_init(void)
calc_load_update = jiffies + LOAD_FREQ;
#ifdef CONFIG_SMP
- zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
/* May be allocated at isolcpus cmdline parse time */
if (cpu_isolated_map == NULL)
zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
@@ -6245,8 +5960,10 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
!is_idle_task(current)) ||
- system_state != SYSTEM_RUNNING || oops_in_progress)
+ system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
+ oops_in_progress)
return;
+
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
return;
prev_jiffy = jiffies;
@@ -6501,385 +6218,6 @@ void sched_move_task(struct task_struct *tsk)
task_rq_unlock(rq, tsk, &rf);
}
-#endif /* CONFIG_CGROUP_SCHED */
-
-#ifdef CONFIG_RT_GROUP_SCHED
-/*
- * Ensure that the real time constraints are schedulable.
- */
-static DEFINE_MUTEX(rt_constraints_mutex);
-
-/* Must be called with tasklist_lock held */
-static inline int tg_has_rt_tasks(struct task_group *tg)
-{
- struct task_struct *g, *p;
-
- /*
- * Autogroups do not have RT tasks; see autogroup_create().
- */
- if (task_group_is_autogroup(tg))
- return 0;
-
- for_each_process_thread(g, p) {
- if (rt_task(p) && task_group(p) == tg)
- return 1;
- }
-
- return 0;
-}
-
-struct rt_schedulable_data {
- struct task_group *tg;
- u64 rt_period;
- u64 rt_runtime;
-};
-
-static int tg_rt_schedulable(struct task_group *tg, void *data)
-{
- struct rt_schedulable_data *d = data;
- struct task_group *child;
- unsigned long total, sum = 0;
- u64 period, runtime;
-
- period = ktime_to_ns(tg->rt_bandwidth.rt_period);
- runtime = tg->rt_bandwidth.rt_runtime;
-
- if (tg == d->tg) {
- period = d->rt_period;
- runtime = d->rt_runtime;
- }
-
- /*
- * Cannot have more runtime than the period.
- */
- if (runtime > period && runtime != RUNTIME_INF)
- return -EINVAL;
-
- /*
- * Ensure we don't starve existing RT tasks.
- */
- if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
- return -EBUSY;
-
- total = to_ratio(period, runtime);
-
- /*
- * Nobody can have more than the global setting allows.
- */
- if (total > to_ratio(global_rt_period(), global_rt_runtime()))
- return -EINVAL;
-
- /*
- * The sum of our children's runtime should not exceed our own.
- */
- list_for_each_entry_rcu(child, &tg->children, siblings) {
- period = ktime_to_ns(child->rt_bandwidth.rt_period);
- runtime = child->rt_bandwidth.rt_runtime;
-
- if (child == d->tg) {
- period = d->rt_period;
- runtime = d->rt_runtime;
- }
-
- sum += to_ratio(period, runtime);
- }
-
- if (sum > total)
- return -EINVAL;
-
- return 0;
-}
-
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
-{
- int ret;
-
- struct rt_schedulable_data data = {
- .tg = tg,
- .rt_period = period,
- .rt_runtime = runtime,
- };
-
- rcu_read_lock();
- ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
- rcu_read_unlock();
-
- return ret;
-}
-
-static int tg_set_rt_bandwidth(struct task_group *tg,
- u64 rt_period, u64 rt_runtime)
-{
- int i, err = 0;
-
- /*
- * Disallowing the root group RT runtime is BAD, it would disallow the
- * kernel creating (and or operating) RT threads.
- */
- if (tg == &root_task_group && rt_runtime == 0)
- return -EINVAL;
-
- /* No period doesn't make any sense. */
- if (rt_period == 0)
- return -EINVAL;
-
- mutex_lock(&rt_constraints_mutex);
- read_lock(&tasklist_lock);
- err = __rt_schedulable(tg, rt_period, rt_runtime);
- if (err)
- goto unlock;
-
- raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
- tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
- tg->rt_bandwidth.rt_runtime = rt_runtime;
-
- for_each_possible_cpu(i) {
- struct rt_rq *rt_rq = tg->rt_rq[i];
-
- raw_spin_lock(&rt_rq->rt_runtime_lock);
- rt_rq->rt_runtime = rt_runtime;
- raw_spin_unlock(&rt_rq->rt_runtime_lock);
- }
- raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
-unlock:
- read_unlock(&tasklist_lock);
- mutex_unlock(&rt_constraints_mutex);
-
- return err;
-}
-
-static int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
-{
- u64 rt_runtime, rt_period;
-
- rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
- rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
- if (rt_runtime_us < 0)
- rt_runtime = RUNTIME_INF;
-
- return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
-}
-
-static long sched_group_rt_runtime(struct task_group *tg)
-{
- u64 rt_runtime_us;
-
- if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
- return -1;
-
- rt_runtime_us = tg->rt_bandwidth.rt_runtime;
- do_div(rt_runtime_us, NSEC_PER_USEC);
- return rt_runtime_us;
-}
-
-static int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
-{
- u64 rt_runtime, rt_period;
-
- rt_period = rt_period_us * NSEC_PER_USEC;
- rt_runtime = tg->rt_bandwidth.rt_runtime;
-
- return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
-}
-
-static long sched_group_rt_period(struct task_group *tg)
-{
- u64 rt_period_us;
-
- rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
- do_div(rt_period_us, NSEC_PER_USEC);
- return rt_period_us;
-}
-#endif /* CONFIG_RT_GROUP_SCHED */
-
-#ifdef CONFIG_RT_GROUP_SCHED
-static int sched_rt_global_constraints(void)
-{
- int ret = 0;
-
- mutex_lock(&rt_constraints_mutex);
- read_lock(&tasklist_lock);
- ret = __rt_schedulable(NULL, 0, 0);
- read_unlock(&tasklist_lock);
- mutex_unlock(&rt_constraints_mutex);
-
- return ret;
-}
-
-static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
-{
- /* Don't accept realtime tasks when there is no way for them to run */
- if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
- return 0;
-
- return 1;
-}
-
-#else /* !CONFIG_RT_GROUP_SCHED */
-static int sched_rt_global_constraints(void)
-{
- unsigned long flags;
- int i;
-
- raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
- for_each_possible_cpu(i) {
- struct rt_rq *rt_rq = &cpu_rq(i)->rt;
-
- raw_spin_lock(&rt_rq->rt_runtime_lock);
- rt_rq->rt_runtime = global_rt_runtime();
- raw_spin_unlock(&rt_rq->rt_runtime_lock);
- }
- raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
-
- return 0;
-}
-#endif /* CONFIG_RT_GROUP_SCHED */
-
-static int sched_dl_global_validate(void)
-{
- u64 runtime = global_rt_runtime();
- u64 period = global_rt_period();
- u64 new_bw = to_ratio(period, runtime);
- struct dl_bw *dl_b;
- int cpu, ret = 0;
- unsigned long flags;
-
- /*
- * Here we want to check the bandwidth not being set to some
- * value smaller than the currently allocated bandwidth in
- * any of the root_domains.
- *
- * FIXME: Cycling on all the CPUs is overdoing, but simpler than
- * cycling on root_domains... Discussion on different/better
- * solutions is welcome!
- */
- for_each_possible_cpu(cpu) {
- rcu_read_lock_sched();
- dl_b = dl_bw_of(cpu);
-
- raw_spin_lock_irqsave(&dl_b->lock, flags);
- if (new_bw < dl_b->total_bw)
- ret = -EBUSY;
- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
- rcu_read_unlock_sched();
-
- if (ret)
- break;
- }
-
- return ret;
-}
-
-static void sched_dl_do_global(void)
-{
- u64 new_bw = -1;
- struct dl_bw *dl_b;
- int cpu;
- unsigned long flags;
-
- def_dl_bandwidth.dl_period = global_rt_period();
- def_dl_bandwidth.dl_runtime = global_rt_runtime();
-
- if (global_rt_runtime() != RUNTIME_INF)
- new_bw = to_ratio(global_rt_period(), global_rt_runtime());
-
- /*
- * FIXME: As above...
- */
- for_each_possible_cpu(cpu) {
- rcu_read_lock_sched();
- dl_b = dl_bw_of(cpu);
-
- raw_spin_lock_irqsave(&dl_b->lock, flags);
- dl_b->bw = new_bw;
- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
- rcu_read_unlock_sched();
- }
-}
-
-static int sched_rt_global_validate(void)
-{
- if (sysctl_sched_rt_period <= 0)
- return -EINVAL;
-
- if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
- (sysctl_sched_rt_runtime > sysctl_sched_rt_period))
- return -EINVAL;
-
- return 0;
-}
-
-static void sched_rt_do_global(void)
-{
- def_rt_bandwidth.rt_runtime = global_rt_runtime();
- def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
-}
-
-int sched_rt_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- int old_period, old_runtime;
- static DEFINE_MUTEX(mutex);
- int ret;
-
- mutex_lock(&mutex);
- old_period = sysctl_sched_rt_period;
- old_runtime = sysctl_sched_rt_runtime;
-
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
-
- if (!ret && write) {
- ret = sched_rt_global_validate();
- if (ret)
- goto undo;
-
- ret = sched_dl_global_validate();
- if (ret)
- goto undo;
-
- ret = sched_rt_global_constraints();
- if (ret)
- goto undo;
-
- sched_rt_do_global();
- sched_dl_do_global();
- }
- if (0) {
-undo:
- sysctl_sched_rt_period = old_period;
- sysctl_sched_rt_runtime = old_runtime;
- }
- mutex_unlock(&mutex);
-
- return ret;
-}
-
-int sched_rr_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- int ret;
- static DEFINE_MUTEX(mutex);
-
- mutex_lock(&mutex);
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
- /*
- * Make sure that internally we keep jiffies.
- * Also, writing zero resets the timeslice to default:
- */
- if (!ret && write) {
- sched_rr_timeslice =
- sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
- msecs_to_jiffies(sysctl_sched_rr_timeslice);
- }
- mutex_unlock(&mutex);
- return ret;
-}
-
-#ifdef CONFIG_CGROUP_SCHED
static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
{