1300 lines
36 KiB
Diff
1300 lines
36 KiB
Diff
|
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
|
||
|
index 68b21395a743..3f4b9c6911be 100644
|
||
|
--- a/Documentation/admin-guide/sysctl/kernel.rst
|
||
|
+++ b/Documentation/admin-guide/sysctl/kernel.rst
|
||
|
@@ -1088,6 +1088,10 @@ Model available). If your platform happens to meet the
|
||
|
requirements for EAS but you do not want to use it, change
|
||
|
this value to 0.
|
||
|
|
||
|
+sched_interactivity_factor (CacULE scheduler only)
|
||
|
+==================================================
|
||
|
+Sets the value *m* for interactivity score calculations. See
|
||
|
+Figure 1 in https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf
|
||
|
|
||
|
sched_schedstats
|
||
|
================
|
||
|
diff --git a/Documentation/scheduler/sched-CacULE.rst b/Documentation/scheduler/sched-CacULE.rst
|
||
|
new file mode 100644
|
||
|
index 000000000000..82b0847c468a
|
||
|
--- /dev/null
|
||
|
+++ b/Documentation/scheduler/sched-CacULE.rst
|
||
|
@@ -0,0 +1,76 @@
|
||
|
+======================================
|
||
|
+The CacULE Scheduler by Hamad Al Marri.
|
||
|
+======================================
|
||
|
+
|
||
|
+1. Overview
|
||
|
+=============
|
||
|
+
|
||
|
+The CacULE CPU scheduler is based on interactivity score mechanism.
|
||
|
+The interactivity score is inspired by the ULE scheduler (FreeBSD
|
||
|
+scheduler).
|
||
|
+
|
||
|
+1.1 About CacULE Scheduler
|
||
|
+--------------------------
|
||
|
+
|
||
|
+ - Each CPU has its own runqueue.
|
||
|
+
|
||
|
+ - NORMAL runqueue is a linked list of sched_entities (instead of RB-Tree).
|
||
|
+
|
||
|
+ - RT and other runqueues are just the same as the CFS's.
|
||
|
+
|
||
|
+ - Wake up tasks preempt currently running tasks if its interactivity score value
|
||
|
+ is higher.
|
||
|
+
|
||
|
+
|
||
|
+1.2. Complexity
|
||
|
+----------------
|
||
|
+
|
||
|
+The complexity of Enqueue and Dequeue a task is O(1).
|
||
|
+
|
||
|
+The complexity of pick the next task is in O(n), where n is the number of tasks
|
||
|
+in a runqueue (each CPU has its own runqueue).
|
||
|
+
|
||
|
+Note: O(n) sounds scary, but usually for a machine with 4 CPUS where it is used
|
||
|
+for desktop or mobile jobs, the maximum number of runnable tasks might not
|
||
|
+exceeds 10 (at the pick next run time) - the idle tasks are excluded since they
|
||
|
+are dequeued when sleeping and enqueued when they wake up.
|
||
|
+
|
||
|
+
|
||
|
+2. The CacULE Interactivity Score
|
||
|
+=======================================================
|
||
|
+
|
||
|
+The interactivity score is inspired by the ULE scheduler (FreeBSD scheduler).
|
||
|
+For more information see: https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf
|
||
|
+CacULE doesn't replace CFS with ULE, it only changes the CFS' pick next task
|
||
|
+mechanism to ULE's interactivity score mechanism for picking next task to run.
|
||
|
+
|
||
|
+
|
||
|
+2.3 sched_interactivity_factor
|
||
|
+=================
|
||
|
+Sets the value *m* for interactivity score calculations. See Figure 1 in
|
||
|
+https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf
|
||
|
+The default value of in CacULE is 10 which means that the Maximum Interactive
|
||
|
+Score is 20 (since m = Maximum Interactive Score / 2).
|
||
|
+You can tune sched_interactivity_factor with sysctl command:
|
||
|
+
|
||
|
+ sysctl kernel.sched_interactivity_factor=50
|
||
|
+
|
||
|
+This command changes the sched_interactivity_factor from 10 to 50.
|
||
|
+
|
||
|
+
|
||
|
+3. Scheduling policies
|
||
|
+=======================
|
||
|
+
|
||
|
+CacULE some CFS, implements three scheduling policies:
|
||
|
+
|
||
|
+ - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling
|
||
|
+ policy that is used for regular tasks.
|
||
|
+
|
||
|
+ - SCHED_BATCH: Does not preempt nearly as often as regular tasks
|
||
|
+ would, thereby allowing tasks to run longer and make better use of
|
||
|
+ caches but at the cost of interactivity. This is well suited for
|
||
|
+ batch jobs.
|
||
|
+
|
||
|
+ - SCHED_IDLE: This is even weaker than nice 19, but its not a true
|
||
|
+ idle timer scheduler in order to avoid to get into priority
|
||
|
+ inversion problems which would deadlock the machine.
|
||
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
||
|
index d2c881384517..0be8e440d720 100644
|
||
|
--- a/include/linux/sched.h
|
||
|
+++ b/include/linux/sched.h
|
||
|
@@ -450,10 +450,22 @@ struct sched_statistics {
|
||
|
#endif
|
||
|
};
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+struct cacule_node {
|
||
|
+ struct cacule_node* next;
|
||
|
+ struct cacule_node* prev;
|
||
|
+ u64 cacule_start_time;
|
||
|
+ u64 vruntime;
|
||
|
+};
|
||
|
+#endif
|
||
|
+
|
||
|
struct sched_entity {
|
||
|
/* For load-balancing: */
|
||
|
struct load_weight load;
|
||
|
struct rb_node run_node;
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ struct cacule_node cacule_node;
|
||
|
+#endif
|
||
|
struct list_head group_node;
|
||
|
unsigned int on_rq;
|
||
|
|
||
|
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
|
||
|
index db2c0f34aaaf..5a66fc5826fc 100644
|
||
|
--- a/include/linux/sched/sysctl.h
|
||
|
+++ b/include/linux/sched/sysctl.h
|
||
|
@@ -32,6 +32,12 @@ extern unsigned int sysctl_sched_latency;
|
||
|
extern unsigned int sysctl_sched_min_granularity;
|
||
|
extern unsigned int sysctl_sched_wakeup_granularity;
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+extern unsigned int interactivity_factor;
|
||
|
+extern unsigned int interactivity_threshold;
|
||
|
+extern unsigned int cacule_max_lifetime;
|
||
|
+#endif
|
||
|
+
|
||
|
enum sched_tunable_scaling {
|
||
|
SCHED_TUNABLESCALING_NONE,
|
||
|
SCHED_TUNABLESCALING_LOG,
|
||
|
diff --git a/init/Kconfig b/init/Kconfig
|
||
|
index a61c92066c2e..089e3bfe5dbc 100644
|
||
|
--- a/init/Kconfig
|
||
|
+++ b/init/Kconfig
|
||
|
@@ -834,6 +834,17 @@ config UCLAMP_BUCKETS_COUNT
|
||
|
|
||
|
endmenu
|
||
|
|
||
|
+config CACULE_SCHED
|
||
|
+ bool "CacULE CPU scheduler"
|
||
|
+ default y
|
||
|
+ help
|
||
|
+ The CacULE CPU scheduler is based on interactivity score mechanism.
|
||
|
+ The interactivity score is inspired by the ULE scheduler (FreeBSD
|
||
|
+ scheduler).
|
||
|
+
|
||
|
+ If unsure, say Y here.
|
||
|
+
|
||
|
+
|
||
|
#
|
||
|
# For architectures that want to enable the support for NUMA-affine scheduler
|
||
|
# balancing logic:
|
||
|
@@ -1231,6 +1242,7 @@ config SCHED_AUTOGROUP
|
||
|
select CGROUPS
|
||
|
select CGROUP_SCHED
|
||
|
select FAIR_GROUP_SCHED
|
||
|
+ default y
|
||
|
help
|
||
|
This option optimizes the scheduler for common desktop workloads by
|
||
|
automatically creating and populating task groups. This separation
|
||
|
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
|
||
|
index 38ef6d06888e..865f8dbddca8 100644
|
||
|
--- a/kernel/Kconfig.hz
|
||
|
+++ b/kernel/Kconfig.hz
|
||
|
@@ -46,6 +46,9 @@ choice
|
||
|
1000 Hz is the preferred choice for desktop systems and other
|
||
|
systems requiring fast interactive responses to events.
|
||
|
|
||
|
+ config HZ_2000
|
||
|
+ bool "2000 HZ"
|
||
|
+
|
||
|
endchoice
|
||
|
|
||
|
config HZ
|
||
|
@@ -54,6 +57,7 @@ config HZ
|
||
|
default 250 if HZ_250
|
||
|
default 300 if HZ_300
|
||
|
default 1000 if HZ_1000
|
||
|
+ default 2000 if HZ_2000
|
||
|
|
||
|
config SCHED_HRTICK
|
||
|
def_bool HIGH_RES_TIMERS
|
||
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
||
|
index 5226cc26a095..8505beec4e99 100644
|
||
|
--- a/kernel/sched/core.c
|
||
|
+++ b/kernel/sched/core.c
|
||
|
@@ -3574,6 +3574,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||
|
p->se.prev_sum_exec_runtime = 0;
|
||
|
p->se.nr_migrations = 0;
|
||
|
p->se.vruntime = 0;
|
||
|
+
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ p->se.cacule_node.vruntime = 0;
|
||
|
+#endif
|
||
|
+
|
||
|
INIT_LIST_HEAD(&p->se.group_node);
|
||
|
|
||
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||
|
@@ -3859,6 +3864,10 @@ void wake_up_new_task(struct task_struct *p)
|
||
|
update_rq_clock(rq);
|
||
|
post_init_entity_util_avg(p);
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ p->se.cacule_node.cacule_start_time = sched_clock();
|
||
|
+#endif
|
||
|
+
|
||
|
activate_task(rq, p, ENQUEUE_NOCLOCK);
|
||
|
trace_sched_wakeup_new(p);
|
||
|
check_preempt_curr(rq, p, WF_FORK);
|
||
|
@@ -8103,6 +8112,10 @@ void __init sched_init(void)
|
||
|
BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
|
||
|
#endif
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ printk(KERN_INFO "CacULE CPU scheduler v5.13 by Hamad Al Marri.");
|
||
|
+#endif
|
||
|
+
|
||
|
wait_bit_init();
|
||
|
|
||
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||
|
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
|
||
|
index 9c882f20803e..1af3163f5b73 100644
|
||
|
--- a/kernel/sched/debug.c
|
||
|
+++ b/kernel/sched/debug.c
|
||
|
@@ -560,8 +560,11 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
|
||
|
|
||
|
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||
|
{
|
||
|
- s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
|
||
|
- spread, rq0_min_vruntime, spread0;
|
||
|
+ s64 MIN_vruntime = -1, max_vruntime = -1,
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
+ min_vruntime, rq0_min_vruntime, spread0,
|
||
|
+#endif
|
||
|
+ spread;
|
||
|
struct rq *rq = cpu_rq(cpu);
|
||
|
struct sched_entity *last;
|
||
|
unsigned long flags;
|
||
|
@@ -582,21 +585,27 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||
|
last = __pick_last_entity(cfs_rq);
|
||
|
if (last)
|
||
|
max_vruntime = last->vruntime;
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
min_vruntime = cfs_rq->min_vruntime;
|
||
|
rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
|
||
|
+#endif
|
||
|
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||
|
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
|
||
|
SPLIT_NS(MIN_vruntime));
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
|
||
|
SPLIT_NS(min_vruntime));
|
||
|
+#endif
|
||
|
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "max_vruntime",
|
||
|
SPLIT_NS(max_vruntime));
|
||
|
spread = max_vruntime - MIN_vruntime;
|
||
|
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread",
|
||
|
SPLIT_NS(spread));
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
spread0 = min_vruntime - rq0_min_vruntime;
|
||
|
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
|
||
|
SPLIT_NS(spread0));
|
||
|
+#endif
|
||
|
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
|
||
|
cfs_rq->nr_spread_over);
|
||
|
SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
|
||
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||
|
index 3248e24a90b0..5bc521731064 100644
|
||
|
--- a/kernel/sched/fair.c
|
||
|
+++ b/kernel/sched/fair.c
|
||
|
@@ -19,9 +19,25 @@
|
||
|
*
|
||
|
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
|
||
|
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
|
||
|
+ *
|
||
|
+ * CacULE enhancements CPU cache and scheduler based on
|
||
|
+ * Interactivity Score.
|
||
|
+ * (C) 2020 Hamad Al Marri <hamad.s.almarri@gmail.com>
|
||
|
*/
|
||
|
#include "sched.h"
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+unsigned int __read_mostly cacule_max_lifetime = 22000; // in ms
|
||
|
+unsigned int __read_mostly interactivity_factor = 32768;
|
||
|
+
|
||
|
+#ifdef CONFIG_FAIR_GROUP_SCHED
|
||
|
+unsigned int __read_mostly interactivity_threshold = 0;
|
||
|
+#else
|
||
|
+unsigned int __read_mostly interactivity_threshold = 1000;
|
||
|
+#endif
|
||
|
+
|
||
|
+#endif
|
||
|
+
|
||
|
/*
|
||
|
* Targeted preemption latency for CPU-bound tasks:
|
||
|
*
|
||
|
@@ -82,7 +98,11 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
|
||
|
unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
|
||
|
static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+const_debug unsigned int sysctl_sched_migration_cost = 200000UL;
|
||
|
+#else
|
||
|
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
||
|
+#endif
|
||
|
|
||
|
int sched_thermal_decay_shift;
|
||
|
static int __init setup_sched_thermal_decay_shift(char *str)
|
||
|
@@ -263,6 +283,14 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
|
||
|
|
||
|
const struct sched_class fair_sched_class;
|
||
|
|
||
|
+
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+static inline struct sched_entity *se_of(struct cacule_node *cn)
|
||
|
+{
|
||
|
+ return container_of(cn, struct sched_entity, cacule_node);
|
||
|
+}
|
||
|
+#endif
|
||
|
+
|
||
|
/**************************************************************
|
||
|
* CFS operations on generic schedulable entities:
|
||
|
*/
|
||
|
@@ -522,7 +550,7 @@ void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec);
|
||
|
/**************************************************************
|
||
|
* Scheduling class tree data structure manipulation methods:
|
||
|
*/
|
||
|
-
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
static inline u64 max_vruntime(u64 max_vruntime, u64 vruntime)
|
||
|
{
|
||
|
s64 delta = (s64)(vruntime - max_vruntime);
|
||
|
@@ -585,7 +613,170 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
|
||
|
{
|
||
|
return entity_before(__node_2_se(a), __node_2_se(b));
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
+
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+static unsigned int
|
||
|
+calc_interactivity(u64 now, struct cacule_node *se)
|
||
|
+{
|
||
|
+ u64 l_se, vr_se, sleep_se = 1ULL, u64_factor_m, _2m;
|
||
|
+ unsigned int score_se;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * in case of vruntime==0, logical OR with 1 would
|
||
|
+ * make sure that the least sig. bit is 1
|
||
|
+ */
|
||
|
+ l_se = now - se->cacule_start_time;
|
||
|
+ vr_se = se->vruntime | 1;
|
||
|
+ u64_factor_m = interactivity_factor;
|
||
|
+ _2m = u64_factor_m << 1;
|
||
|
+
|
||
|
+ /* safety check */
|
||
|
+ if (likely(l_se > vr_se))
|
||
|
+ sleep_se = (l_se - vr_se) | 1;
|
||
|
+
|
||
|
+ if (sleep_se >= vr_se)
|
||
|
+ score_se = u64_factor_m / (sleep_se / vr_se);
|
||
|
+ else
|
||
|
+ score_se = _2m - (u64_factor_m / (vr_se / sleep_se));
|
||
|
+
|
||
|
+ return score_se;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int is_interactive(struct cacule_node *cn)
|
||
|
+{
|
||
|
+ if (!interactivity_threshold || se_of(cn)->vruntime == 0)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ return calc_interactivity(sched_clock(), cn) < interactivity_threshold;
|
||
|
+}
|
||
|
+
|
||
|
+static inline int
|
||
|
+entity_before_cached(u64 now, unsigned int score_curr, struct cacule_node *se)
|
||
|
+{
|
||
|
+ unsigned int score_se;
|
||
|
+ int diff;
|
||
|
+
|
||
|
+ score_se = calc_interactivity(now, se);
|
||
|
+ diff = score_se - score_curr;
|
||
|
+
|
||
|
+ if (diff <= 0)
|
||
|
+ return 1;
|
||
|
+
|
||
|
+ return -1;
|
||
|
+}
|
||
|
+
|
||
|
+/*
|
||
|
+ * Does se have lower interactivity score value (i.e. interactive) than curr? If yes, return 1,
|
||
|
+ * otherwise return -1
|
||
|
+ * se is before curr if se has lower interactivity score value
|
||
|
+ * the lower score, the more interactive
|
||
|
+ */
|
||
|
+static inline int
|
||
|
+entity_before(u64 now, struct cacule_node *curr, struct cacule_node *se)
|
||
|
+{
|
||
|
+ unsigned int score_curr, score_se;
|
||
|
+ int diff;
|
||
|
+
|
||
|
+ score_curr = calc_interactivity(now, curr);
|
||
|
+ score_se = calc_interactivity(now, se);
|
||
|
+
|
||
|
+ diff = score_se - score_curr;
|
||
|
+
|
||
|
+ if (diff < 0)
|
||
|
+ return 1;
|
||
|
+
|
||
|
+ return -1;
|
||
|
+}
|
||
|
|
||
|
+/*
|
||
|
+ * Enqueue an entity
|
||
|
+ */
|
||
|
+static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se)
|
||
|
+{
|
||
|
+ struct cacule_node *se = &(_se->cacule_node);
|
||
|
+ struct cacule_node *iter, *next = NULL;
|
||
|
+ u64 now = sched_clock();
|
||
|
+ unsigned int score_se = calc_interactivity(now, se);
|
||
|
+
|
||
|
+ se->next = NULL;
|
||
|
+ se->prev = NULL;
|
||
|
+
|
||
|
+ if (likely(cfs_rq->head)) {
|
||
|
+
|
||
|
+ // start from tail
|
||
|
+ iter = cfs_rq->tail;
|
||
|
+
|
||
|
+ // does se have higher IS than iter?
|
||
|
+ while (iter && entity_before_cached(now, score_se, iter) == -1) {
|
||
|
+ next = iter;
|
||
|
+ iter = iter->prev;
|
||
|
+ }
|
||
|
+
|
||
|
+ // se in tail position
|
||
|
+ if (iter == cfs_rq->tail) {
|
||
|
+ cfs_rq->tail->next = se;
|
||
|
+ se->prev = cfs_rq->tail;
|
||
|
+
|
||
|
+ cfs_rq->tail = se;
|
||
|
+ }
|
||
|
+ // else if not head no tail, insert se after iter
|
||
|
+ else if (iter) {
|
||
|
+ se->next = next;
|
||
|
+ se->prev = iter;
|
||
|
+
|
||
|
+ iter->next = se;
|
||
|
+ next->prev = se;
|
||
|
+ }
|
||
|
+ // insert se at head
|
||
|
+ else {
|
||
|
+ se->next = cfs_rq->head;
|
||
|
+ cfs_rq->head->prev = se;
|
||
|
+
|
||
|
+ // lastly reset the head
|
||
|
+ cfs_rq->head = se;
|
||
|
+ }
|
||
|
+ } else {
|
||
|
+ // if empty rq
|
||
|
+ cfs_rq->head = se;
|
||
|
+ cfs_rq->tail = se;
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se)
|
||
|
+{
|
||
|
+ struct cacule_node *se = &(_se->cacule_node);
|
||
|
+
|
||
|
+ // if only one se in rq
|
||
|
+ if (cfs_rq->head == cfs_rq->tail) {
|
||
|
+ cfs_rq->head = NULL;
|
||
|
+ cfs_rq->tail = NULL;
|
||
|
+
|
||
|
+ } else if (se == cfs_rq->head) {
|
||
|
+ // if it is the head
|
||
|
+ cfs_rq->head = cfs_rq->head->next;
|
||
|
+ cfs_rq->head->prev = NULL;
|
||
|
+ } else if (se == cfs_rq->tail) {
|
||
|
+ // if it is the tail
|
||
|
+ cfs_rq->tail = cfs_rq->tail->prev;
|
||
|
+ cfs_rq->tail->next = NULL;
|
||
|
+ } else {
|
||
|
+ // if in the middle
|
||
|
+ struct cacule_node *prev = se->prev;
|
||
|
+ struct cacule_node *next = se->next;
|
||
|
+
|
||
|
+ prev->next = next;
|
||
|
+
|
||
|
+ if (next)
|
||
|
+ next->prev = prev;
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
|
||
|
+{
|
||
|
+ return se_of(cfs_rq->head);
|
||
|
+}
|
||
|
+#else
|
||
|
/*
|
||
|
* Enqueue an entity into the rb-tree:
|
||
|
*/
|
||
|
@@ -618,16 +809,24 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se)
|
||
|
|
||
|
return __node_2_se(next);
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
#ifdef CONFIG_SCHED_DEBUG
|
||
|
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
|
||
|
{
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ if (!cfs_rq->tail)
|
||
|
+ return NULL;
|
||
|
+
|
||
|
+ return se_of(cfs_rq->tail);
|
||
|
+#else
|
||
|
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root);
|
||
|
|
||
|
if (!last)
|
||
|
return NULL;
|
||
|
|
||
|
return __node_2_se(last);
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
}
|
||
|
|
||
|
/**************************************************************
|
||
|
@@ -717,6 +916,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||
|
return slice;
|
||
|
}
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/*
|
||
|
* We calculate the vruntime slice of a to-be-inserted task.
|
||
|
*
|
||
|
@@ -726,6 +926,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||
|
{
|
||
|
return calc_delta_fair(sched_slice(cfs_rq, se), se);
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
#include "pelt.h"
|
||
|
#ifdef CONFIG_SMP
|
||
|
@@ -833,14 +1034,51 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
|
||
|
}
|
||
|
#endif /* CONFIG_SMP */
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+static void normalize_lifetime(u64 now, struct sched_entity *se)
|
||
|
+{
|
||
|
+ struct cacule_node *cn = &se->cacule_node;
|
||
|
+ u64 max_life_ns, life_time;
|
||
|
+ s64 diff;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * left shift 20 bits is approximately = * 1000000
|
||
|
+ * we don't need the precision of life time
|
||
|
+ * Ex. for 30s, with left shift (20bits) == 31.457s
|
||
|
+ */
|
||
|
+ max_life_ns = ((u64) cacule_max_lifetime) << 20;
|
||
|
+ life_time = now - cn->cacule_start_time;
|
||
|
+ diff = life_time - max_life_ns;
|
||
|
+
|
||
|
+ if (diff > 0) {
|
||
|
+ // multiply life_time by 1024 for more precision
|
||
|
+ u64 old_hrrn_x = (life_time << 7) / ((cn->vruntime >> 3) | 1);
|
||
|
+
|
||
|
+ // reset life to half max_life (i.e ~15s)
|
||
|
+ cn->cacule_start_time = now - (max_life_ns >> 1);
|
||
|
+
|
||
|
+ // avoid division by zero
|
||
|
+ if (old_hrrn_x == 0) old_hrrn_x = 1;
|
||
|
+
|
||
|
+ // reset vruntime based on old hrrn ratio
|
||
|
+ cn->vruntime = (max_life_ns << 9) / old_hrrn_x;
|
||
|
+ }
|
||
|
+}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
+
|
||
|
/*
|
||
|
* Update the current task's runtime statistics.
|
||
|
*/
|
||
|
static void update_curr(struct cfs_rq *cfs_rq)
|
||
|
{
|
||
|
struct sched_entity *curr = cfs_rq->curr;
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ u64 now = sched_clock();
|
||
|
+ u64 delta_exec, delta_fair;
|
||
|
+#else
|
||
|
u64 now = rq_clock_task(rq_of(cfs_rq));
|
||
|
u64 delta_exec;
|
||
|
+#endif
|
||
|
|
||
|
if (unlikely(!curr))
|
||
|
return;
|
||
|
@@ -857,8 +1095,15 @@ static void update_curr(struct cfs_rq *cfs_rq)
|
||
|
curr->sum_exec_runtime += delta_exec;
|
||
|
schedstat_add(cfs_rq->exec_clock, delta_exec);
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ delta_fair = calc_delta_fair(delta_exec, curr);
|
||
|
+ curr->vruntime += delta_fair;
|
||
|
+ curr->cacule_node.vruntime += delta_fair;
|
||
|
+ normalize_lifetime(now, curr);
|
||
|
+#else
|
||
|
curr->vruntime += calc_delta_fair(delta_exec, curr);
|
||
|
update_min_vruntime(cfs_rq);
|
||
|
+#endif
|
||
|
|
||
|
if (entity_is_task(curr)) {
|
||
|
struct task_struct *curtask = task_of(curr);
|
||
|
@@ -1026,7 +1271,6 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||
|
static inline void
|
||
|
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||
|
{
|
||
|
-
|
||
|
if (!schedstat_enabled())
|
||
|
return;
|
||
|
|
||
|
@@ -1058,7 +1302,11 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||
|
/*
|
||
|
* We are starting a new run period:
|
||
|
*/
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ se->exec_start = sched_clock();
|
||
|
+#else
|
||
|
se->exec_start = rq_clock_task(rq_of(cfs_rq));
|
||
|
+#endif
|
||
|
}
|
||
|
|
||
|
/**************************************************
|
||
|
@@ -4121,7 +4369,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
|
||
|
|
||
|
static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||
|
{
|
||
|
-#ifdef CONFIG_SCHED_DEBUG
|
||
|
+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_CACULE_SCHED)
|
||
|
s64 d = se->vruntime - cfs_rq->min_vruntime;
|
||
|
|
||
|
if (d < 0)
|
||
|
@@ -4132,6 +4380,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
static void
|
||
|
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||
|
{
|
||
|
@@ -4163,6 +4412,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||
|
/* ensure we never gain time by being placed backwards. */
|
||
|
se->vruntime = max_vruntime(se->vruntime, vruntime);
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
|
||
|
|
||
|
@@ -4221,18 +4471,23 @@ static inline bool cfs_bandwidth_used(void);
|
||
|
static void
|
||
|
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||
|
{
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
|
||
|
+#endif
|
||
|
bool curr = cfs_rq->curr == se;
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/*
|
||
|
* If we're the current task, we must renormalise before calling
|
||
|
* update_curr().
|
||
|
*/
|
||
|
if (renorm && curr)
|
||
|
se->vruntime += cfs_rq->min_vruntime;
|
||
|
+#endif
|
||
|
|
||
|
update_curr(cfs_rq);
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/*
|
||
|
* Otherwise, renormalise after, such that we're placed at the current
|
||
|
* moment in time, instead of some random moment in the past. Being
|
||
|
@@ -4241,6 +4496,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||
|
*/
|
||
|
if (renorm && !curr)
|
||
|
se->vruntime += cfs_rq->min_vruntime;
|
||
|
+#endif
|
||
|
|
||
|
/*
|
||
|
* When enqueuing a sched_entity, we must:
|
||
|
@@ -4255,8 +4511,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||
|
update_cfs_group(se);
|
||
|
account_entity_enqueue(cfs_rq, se);
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
if (flags & ENQUEUE_WAKEUP)
|
||
|
place_entity(cfs_rq, se, 0);
|
||
|
+#endif
|
||
|
|
||
|
check_schedstat_required();
|
||
|
update_stats_enqueue(cfs_rq, se, flags);
|
||
|
@@ -4277,6 +4535,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||
|
check_enqueue_throttle(cfs_rq);
|
||
|
}
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
static void __clear_buddies_last(struct sched_entity *se)
|
||
|
{
|
||
|
for_each_sched_entity(se) {
|
||
|
@@ -4321,6 +4580,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||
|
if (cfs_rq->skip == se)
|
||
|
__clear_buddies_skip(se);
|
||
|
}
|
||
|
+#endif /* !CONFIG_CACULE_SCHED */
|
||
|
|
||
|
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
|
||
|
|
||
|
@@ -4345,13 +4605,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||
|
|
||
|
update_stats_dequeue(cfs_rq, se, flags);
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
clear_buddies(cfs_rq, se);
|
||
|
+#endif
|
||
|
|
||
|
if (se != cfs_rq->curr)
|
||
|
__dequeue_entity(cfs_rq, se);
|
||
|
se->on_rq = 0;
|
||
|
account_entity_dequeue(cfs_rq, se);
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/*
|
||
|
* Normalize after update_curr(); which will also have moved
|
||
|
* min_vruntime if @se is the one holding it back. But before doing
|
||
|
@@ -4360,12 +4623,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||
|
*/
|
||
|
if (!(flags & DEQUEUE_SLEEP))
|
||
|
se->vruntime -= cfs_rq->min_vruntime;
|
||
|
+#endif
|
||
|
|
||
|
/* return excess runtime on last dequeue */
|
||
|
return_cfs_rq_runtime(cfs_rq);
|
||
|
|
||
|
update_cfs_group(se);
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/*
|
||
|
* Now advance min_vruntime if @se was the entity holding it back,
|
||
|
* except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
|
||
|
@@ -4374,8 +4639,21 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||
|
*/
|
||
|
if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
|
||
|
update_min_vruntime(cfs_rq);
|
||
|
+#endif
|
||
|
}
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+/*
|
||
|
+ * Preempt the current task with a newly woken task if needed:
|
||
|
+ */
|
||
|
+static void
|
||
|
+check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||
|
+{
|
||
|
+ // does head have higher IS than curr
|
||
|
+ if (entity_before(sched_clock(), &curr->cacule_node, cfs_rq->head) == 1)
|
||
|
+ resched_curr(rq_of(cfs_rq));
|
||
|
+}
|
||
|
+#else
|
||
|
/*
|
||
|
* Preempt the current task with a newly woken task if needed:
|
||
|
*/
|
||
|
@@ -4415,6 +4693,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||
|
if (delta > ideal_runtime)
|
||
|
resched_curr(rq_of(cfs_rq));
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
static void
|
||
|
set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||
|
@@ -4449,6 +4728,21 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||
|
se->prev_sum_exec_runtime = se->sum_exec_runtime;
|
||
|
}
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+static struct sched_entity *
|
||
|
+pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||
|
+{
|
||
|
+ struct cacule_node *se = cfs_rq->head;
|
||
|
+
|
||
|
+ if (unlikely(!se))
|
||
|
+ se = &curr->cacule_node;
|
||
|
+ else if (unlikely(curr
|
||
|
+ && entity_before(sched_clock(), se, &curr->cacule_node) == 1))
|
||
|
+ se = &curr->cacule_node;
|
||
|
+
|
||
|
+ return se_of(se);
|
||
|
+}
|
||
|
+#else
|
||
|
static int
|
||
|
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
|
||
|
|
||
|
@@ -4509,6 +4803,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||
|
|
||
|
return se;
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
|
||
|
|
||
|
@@ -5611,7 +5906,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||
|
hrtick_update(rq);
|
||
|
}
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
static void set_next_buddy(struct sched_entity *se);
|
||
|
+#endif
|
||
|
|
||
|
/*
|
||
|
* The dequeue_task method is called before nr_running is
|
||
|
@@ -5643,12 +5940,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||
|
if (cfs_rq->load.weight) {
|
||
|
/* Avoid re-evaluating load for this entity: */
|
||
|
se = parent_entity(se);
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/*
|
||
|
* Bias pick_next to pick a task from this cfs_rq, as
|
||
|
* p is sleeping when it is within its sched_slice.
|
||
|
*/
|
||
|
if (task_sleep && se && !throttled_hierarchy(cfs_rq))
|
||
|
set_next_buddy(se);
|
||
|
+#endif
|
||
|
break;
|
||
|
}
|
||
|
flags |= DEQUEUE_SLEEP;
|
||
|
@@ -5764,6 +6063,7 @@ static unsigned long capacity_of(int cpu)
|
||
|
return cpu_rq(cpu)->cpu_capacity;
|
||
|
}
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
static void record_wakee(struct task_struct *p)
|
||
|
{
|
||
|
/*
|
||
|
@@ -5810,6 +6110,7 @@ static int wake_wide(struct task_struct *p)
|
||
|
return 0;
|
||
|
return 1;
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
/*
|
||
|
* The purpose of wake_affine() is to quickly determine on which CPU we can run
|
||
|
@@ -6512,6 +6813,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
|
||
|
return min_t(unsigned long, util, capacity_orig_of(cpu));
|
||
|
}
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/*
|
||
|
* Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
|
||
|
* to @dst_cpu.
|
||
|
@@ -6761,6 +7063,57 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
|
||
|
|
||
|
return -1;
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
+
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+static int
|
||
|
+find_least_IS_cpu(struct task_struct *p)
|
||
|
+{
|
||
|
+ struct cfs_rq *cfs_rq;
|
||
|
+ unsigned int max_IS = 0;
|
||
|
+ unsigned int IS, IS_c, IS_h;
|
||
|
+ struct sched_entity *curr_se;
|
||
|
+ struct cacule_node *cn, *head;
|
||
|
+ int cpu_i;
|
||
|
+ int new_cpu = -1;
|
||
|
+
|
||
|
+ for_each_online_cpu(cpu_i) {
|
||
|
+ if (!cpumask_test_cpu(cpu_i, p->cpus_ptr))
|
||
|
+ continue;
|
||
|
+
|
||
|
+ cn = NULL;
|
||
|
+ cfs_rq = &cpu_rq(cpu_i)->cfs;
|
||
|
+
|
||
|
+ curr_se = cfs_rq->curr;
|
||
|
+ head = cfs_rq->head;
|
||
|
+
|
||
|
+ if (!curr_se && head)
|
||
|
+ cn = head;
|
||
|
+ else if (curr_se && !head)
|
||
|
+ cn = &curr_se->cacule_node;
|
||
|
+ else if (curr_se && head) {
|
||
|
+ IS_c = calc_interactivity(sched_clock(), &curr_se->cacule_node);
|
||
|
+ IS_h = calc_interactivity(sched_clock(), head);
|
||
|
+
|
||
|
+ IS = IS_c > IS_h? IS_c : IS_h;
|
||
|
+ goto compare;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (!cn)
|
||
|
+ return cpu_i;
|
||
|
+
|
||
|
+ IS = calc_interactivity(sched_clock(), cn);
|
||
|
+
|
||
|
+compare:
|
||
|
+ if (IS > max_IS) {
|
||
|
+ max_IS = IS;
|
||
|
+ new_cpu = cpu_i;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ return new_cpu;
|
||
|
+}
|
||
|
+#endif
|
||
|
|
||
|
/*
|
||
|
* select_task_rq_fair: Select target runqueue for the waking task in domains
|
||
|
@@ -6785,6 +7138,26 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
|
||
|
/* SD_flags and WF_flags share the first nibble */
|
||
|
int sd_flag = wake_flags & 0xF;
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ struct sched_entity *se = &p->se;
|
||
|
+
|
||
|
+ if (!is_interactive(&se->cacule_node))
|
||
|
+ goto cfs_way;
|
||
|
+
|
||
|
+ // check first if the prev cpu
|
||
|
+ // has 0 tasks
|
||
|
+ if (cpumask_test_cpu(prev_cpu, p->cpus_ptr) &&
|
||
|
+ cpu_rq(prev_cpu)->cfs.nr_running == 0)
|
||
|
+ return prev_cpu;
|
||
|
+
|
||
|
+ new_cpu = find_least_IS_cpu(p);
|
||
|
+
|
||
|
+ if (new_cpu != -1)
|
||
|
+ return new_cpu;
|
||
|
+
|
||
|
+ new_cpu = prev_cpu;
|
||
|
+cfs_way:
|
||
|
+#else
|
||
|
if (wake_flags & WF_TTWU) {
|
||
|
record_wakee(p);
|
||
|
|
||
|
@@ -6797,6 +7170,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
|
||
|
|
||
|
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
rcu_read_lock();
|
||
|
for_each_domain(cpu, tmp) {
|
||
|
@@ -6843,6 +7217,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
|
||
|
*/
|
||
|
static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
|
||
|
{
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/*
|
||
|
* As blocked tasks retain absolute vruntime the migration needs to
|
||
|
* deal with this by subtracting the old and adding the new
|
||
|
@@ -6868,6 +7243,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
|
||
|
|
||
|
se->vruntime -= min_vruntime;
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
if (p->on_rq == TASK_ON_RQ_MIGRATING) {
|
||
|
/*
|
||
|
@@ -6913,6 +7289,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||
|
}
|
||
|
#endif /* CONFIG_SMP */
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
static unsigned long wakeup_gran(struct sched_entity *se)
|
||
|
{
|
||
|
unsigned long gran = sysctl_sched_wakeup_granularity;
|
||
|
@@ -6991,6 +7368,7 @@ static void set_skip_buddy(struct sched_entity *se)
|
||
|
for_each_sched_entity(se)
|
||
|
cfs_rq_of(se)->skip = se;
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
/*
|
||
|
* Preempt the current task with a newly woken task if needed:
|
||
|
@@ -6999,9 +7377,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||
|
{
|
||
|
struct task_struct *curr = rq->curr;
|
||
|
struct sched_entity *se = &curr->se, *pse = &p->se;
|
||
|
+
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
||
|
int scale = cfs_rq->nr_running >= sched_nr_latency;
|
||
|
int next_buddy_marked = 0;
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
if (unlikely(se == pse))
|
||
|
return;
|
||
|
@@ -7015,10 +7396,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||
|
if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
|
||
|
return;
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
|
||
|
set_next_buddy(pse);
|
||
|
next_buddy_marked = 1;
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
/*
|
||
|
* We can come here with TIF_NEED_RESCHED already set from new task
|
||
|
@@ -7048,6 +7431,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||
|
find_matching_se(&se, &pse);
|
||
|
update_curr(cfs_rq_of(se));
|
||
|
BUG_ON(!pse);
|
||
|
+
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ if (entity_before(sched_clock(), &se->cacule_node, &pse->cacule_node) == 1)
|
||
|
+ goto preempt;
|
||
|
+#else
|
||
|
if (wakeup_preempt_entity(se, pse) == 1) {
|
||
|
/*
|
||
|
* Bias pick_next to pick the sched entity that is
|
||
|
@@ -7057,11 +7445,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||
|
set_next_buddy(pse);
|
||
|
goto preempt;
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
return;
|
||
|
|
||
|
preempt:
|
||
|
resched_curr(rq);
|
||
|
+
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/*
|
||
|
* Only set the backward buddy when the current task is still
|
||
|
* on the rq. This can happen when a wakeup gets interleaved
|
||
|
@@ -7076,6 +7467,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||
|
|
||
|
if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
|
||
|
set_last_buddy(se);
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
}
|
||
|
|
||
|
struct task_struct *
|
||
|
@@ -7250,7 +7642,10 @@ static void yield_task_fair(struct rq *rq)
|
||
|
{
|
||
|
struct task_struct *curr = rq->curr;
|
||
|
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
||
|
+
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
struct sched_entity *se = &curr->se;
|
||
|
+#endif
|
||
|
|
||
|
/*
|
||
|
* Are we the only task in the tree?
|
||
|
@@ -7258,7 +7653,9 @@ static void yield_task_fair(struct rq *rq)
|
||
|
if (unlikely(rq->nr_running == 1))
|
||
|
return;
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
clear_buddies(cfs_rq, se);
|
||
|
+#endif
|
||
|
|
||
|
if (curr->policy != SCHED_BATCH) {
|
||
|
update_rq_clock(rq);
|
||
|
@@ -7274,7 +7671,9 @@ static void yield_task_fair(struct rq *rq)
|
||
|
rq_clock_skip_update(rq);
|
||
|
}
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
set_skip_buddy(se);
|
||
|
+#endif
|
||
|
}
|
||
|
|
||
|
static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
|
||
|
@@ -7285,8 +7684,10 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
|
||
|
if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se)))
|
||
|
return false;
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/* Tell the scheduler that we'd really like pse to run next. */
|
||
|
set_next_buddy(se);
|
||
|
+#endif
|
||
|
|
||
|
yield_task_fair(rq);
|
||
|
|
||
|
@@ -7513,6 +7914,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
|
||
|
if (env->sd->flags & SD_SHARE_CPUCAPACITY)
|
||
|
return 0;
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/*
|
||
|
* Buddy candidates are cache hot:
|
||
|
*/
|
||
|
@@ -7520,6 +7922,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
|
||
|
(&p->se == cfs_rq_of(&p->se)->next ||
|
||
|
&p->se == cfs_rq_of(&p->se)->last))
|
||
|
return 1;
|
||
|
+#endif
|
||
|
|
||
|
if (sysctl_sched_migration_cost == -1)
|
||
|
return 1;
|
||
|
@@ -10547,9 +10950,11 @@ static void nohz_newidle_balance(struct rq *this_rq)
|
||
|
if (!housekeeping_cpu(this_cpu, HK_FLAG_SCHED))
|
||
|
return;
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
/* Will wake up very soon. No time for doing anything else*/
|
||
|
if (this_rq->avg_idle < sysctl_sched_migration_cost)
|
||
|
return;
|
||
|
+#endif
|
||
|
|
||
|
/* Don't need to update blocked load of idle CPUs*/
|
||
|
if (!READ_ONCE(nohz.has_blocked) ||
|
||
|
@@ -10612,7 +11017,10 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
|
||
|
*/
|
||
|
rq_unpin_lock(this_rq, rf);
|
||
|
|
||
|
- if (this_rq->avg_idle < sysctl_sched_migration_cost ||
|
||
|
+ if (
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
+ this_rq->avg_idle < sysctl_sched_migration_cost ||
|
||
|
+#endif
|
||
|
!READ_ONCE(this_rq->rd->overload)) {
|
||
|
|
||
|
rcu_read_lock();
|
||
|
@@ -10780,11 +11188,28 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
|
||
|
update_overutilized_status(task_rq(curr));
|
||
|
}
|
||
|
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
/*
|
||
|
* called on fork with the child task as argument from the parent's context
|
||
|
* - child not yet on the tasklist
|
||
|
* - preemption disabled
|
||
|
*/
|
||
|
+ static void task_fork_fair(struct task_struct *p)
|
||
|
+{
|
||
|
+ struct cfs_rq *cfs_rq;
|
||
|
+ struct rq *rq = this_rq();
|
||
|
+ struct rq_flags rf;
|
||
|
+
|
||
|
+ rq_lock(rq, &rf);
|
||
|
+ update_rq_clock(rq);
|
||
|
+
|
||
|
+ cfs_rq = task_cfs_rq(current);
|
||
|
+ if (cfs_rq->curr)
|
||
|
+ update_curr(cfs_rq);
|
||
|
+
|
||
|
+ rq_unlock(rq, &rf);
|
||
|
+}
|
||
|
+#else
|
||
|
static void task_fork_fair(struct task_struct *p)
|
||
|
{
|
||
|
struct cfs_rq *cfs_rq;
|
||
|
@@ -10815,6 +11240,7 @@ static void task_fork_fair(struct task_struct *p)
|
||
|
se->vruntime -= cfs_rq->min_vruntime;
|
||
|
rq_unlock(rq, &rf);
|
||
|
}
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
/*
|
||
|
* Priority of the task has changed. Check to see if we preempt
|
||
|
@@ -10933,6 +11359,8 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
|
||
|
static void detach_task_cfs_rq(struct task_struct *p)
|
||
|
{
|
||
|
struct sched_entity *se = &p->se;
|
||
|
+
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||
|
|
||
|
if (!vruntime_normalized(p)) {
|
||
|
@@ -10943,6 +11371,7 @@ static void detach_task_cfs_rq(struct task_struct *p)
|
||
|
place_entity(cfs_rq, se, 0);
|
||
|
se->vruntime -= cfs_rq->min_vruntime;
|
||
|
}
|
||
|
+#endif
|
||
|
|
||
|
detach_entity_cfs_rq(se);
|
||
|
}
|
||
|
@@ -10950,12 +11379,17 @@ static void detach_task_cfs_rq(struct task_struct *p)
|
||
|
static void attach_task_cfs_rq(struct task_struct *p)
|
||
|
{
|
||
|
struct sched_entity *se = &p->se;
|
||
|
+
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||
|
+#endif
|
||
|
|
||
|
attach_entity_cfs_rq(se);
|
||
|
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
if (!vruntime_normalized(p))
|
||
|
se->vruntime += cfs_rq->min_vruntime;
|
||
|
+#endif
|
||
|
}
|
||
|
|
||
|
static void switched_from_fair(struct rq *rq, struct task_struct *p)
|
||
|
@@ -11011,13 +11445,22 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
|
||
|
void init_cfs_rq(struct cfs_rq *cfs_rq)
|
||
|
{
|
||
|
cfs_rq->tasks_timeline = RB_ROOT_CACHED;
|
||
|
+
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
cfs_rq->min_vruntime = (u64)(-(1LL << 20));
|
||
|
#ifndef CONFIG_64BIT
|
||
|
cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
|
||
|
#endif
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
+
|
||
|
#ifdef CONFIG_SMP
|
||
|
raw_spin_lock_init(&cfs_rq->removed.lock);
|
||
|
#endif
|
||
|
+
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ cfs_rq->head = NULL;
|
||
|
+ cfs_rq->tail = NULL;
|
||
|
+#endif
|
||
|
}
|
||
|
|
||
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||
|
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
|
||
|
index a189bec13729..0affe3be7c21 100644
|
||
|
--- a/kernel/sched/sched.h
|
||
|
+++ b/kernel/sched/sched.h
|
||
|
@@ -525,10 +525,13 @@ struct cfs_rq {
|
||
|
unsigned int idle_h_nr_running; /* SCHED_IDLE */
|
||
|
|
||
|
u64 exec_clock;
|
||
|
+
|
||
|
+#if !defined(CONFIG_CACULE_SCHED)
|
||
|
u64 min_vruntime;
|
||
|
#ifndef CONFIG_64BIT
|
||
|
u64 min_vruntime_copy;
|
||
|
#endif
|
||
|
+#endif /* CONFIG_CACULE_SCHED */
|
||
|
|
||
|
struct rb_root_cached tasks_timeline;
|
||
|
|
||
|
@@ -537,9 +540,15 @@ struct cfs_rq {
|
||
|
* It is set to NULL otherwise (i.e when none are currently running).
|
||
|
*/
|
||
|
struct sched_entity *curr;
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ struct cacule_node *head;
|
||
|
+ struct cacule_node *tail;
|
||
|
+
|
||
|
+#else
|
||
|
struct sched_entity *next;
|
||
|
struct sched_entity *last;
|
||
|
struct sched_entity *skip;
|
||
|
+#endif // CONFIG_CACULE_SCHED
|
||
|
|
||
|
#ifdef CONFIG_SCHED_DEBUG
|
||
|
unsigned int nr_spread_over;
|
||
|
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
||
|
index d4a78e08f6d8..e8cdedf74fed 100644
|
||
|
--- a/kernel/sysctl.c
|
||
|
+++ b/kernel/sysctl.c
|
||
|
@@ -1736,6 +1736,29 @@ static struct ctl_table kern_table[] = {
|
||
|
.mode = 0644,
|
||
|
.proc_handler = proc_dointvec,
|
||
|
},
|
||
|
+#ifdef CONFIG_CACULE_SCHED
|
||
|
+ {
|
||
|
+ .procname = "sched_interactivity_factor",
|
||
|
+ .data = &interactivity_factor,
|
||
|
+ .maxlen = sizeof(unsigned int),
|
||
|
+ .mode = 0644,
|
||
|
+ .proc_handler = proc_dointvec,
|
||
|
+ },
|
||
|
+ {
|
||
|
+ .procname = "sched_interactivity_threshold",
|
||
|
+ .data = &interactivity_threshold,
|
||
|
+ .maxlen = sizeof(unsigned int),
|
||
|
+ .mode = 0644,
|
||
|
+ .proc_handler = proc_dointvec,
|
||
|
+ },
|
||
|
+ {
|
||
|
+ .procname = "sched_max_lifetime_ms",
|
||
|
+ .data = &cacule_max_lifetime,
|
||
|
+ .maxlen = sizeof(unsigned int),
|
||
|
+ .mode = 0644,
|
||
|
+ .proc_handler = proc_dointvec,
|
||
|
+ },
|
||
|
+#endif
|
||
|
#ifdef CONFIG_SCHEDSTATS
|
||
|
{
|
||
|
.procname = "sched_schedstats",
|