Move to unified builder as default

The previous building scripts and patches will be moved to legacy
This commit is contained in:
Tk-Glitch
2020-10-26 22:46:56 +01:00
parent 10e8a62b99
commit e9fb606121
242 changed files with 3 additions and 444828 deletions

View File

@@ -0,0 +1,156 @@
From 5ec2dd3a095442ec1a21d86042a4994f2ba24e63 Mon Sep 17 00:00:00 2001
Message-Id: <5ec2dd3a095442ec1a21d86042a4994f2ba24e63.1512651251.git.jan.steffens@gmail.com>
From: Serge Hallyn <serge.hallyn@canonical.com>
Date: Fri, 31 May 2013 19:12:12 +0100
Subject: [PATCH] add sysctl to disallow unprivileged CLONE_NEWUSER by default
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
[bwh: Remove unneeded binary sysctl bits]
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
kernel/fork.c | 15 +++++++++++++++
kernel/sysctl.c | 12 ++++++++++++
kernel/user_namespace.c | 3 +++
3 files changed, 30 insertions(+)
diff --git a/kernel/fork.c b/kernel/fork.c
index 07cc743698d3668e..4011d68a8ff9305c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -102,6 +102,11 @@
#define CREATE_TRACE_POINTS
#include <trace/events/task.h>
+#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+#else
+#define unprivileged_userns_clone 0
+#endif
/*
* Minimum number of threads to boot the kernel
@@ -1555,6 +1560,10 @@ static __latent_entropy struct task_struct *copy_process(
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
return ERR_PTR(-EINVAL);
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -2348,6 +2357,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+ err = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto bad_unshare_out;
+ }
+
err = check_unshare_flags(unshare_flags);
if (err)
goto bad_unshare_out;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b86520ed3fb60fbf..f7dab3760839f1a1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -105,6 +105,9 @@ extern int core_uses_pid;
extern char core_pattern[];
extern unsigned int core_pipe_limit;
#endif
+#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+#endif
extern int pid_max;
extern int pid_max_min, pid_max_max;
extern int percpu_pagelist_fraction;
@@ -513,6 +516,15 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
+#ifdef CONFIG_USER_NS
+ {
+ .procname = "unprivileged_userns_clone",
+ .data = &unprivileged_userns_clone,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index c490f1e4313b998a..dd03bd39d7bf194d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -24,6 +24,9 @@
#include <linux/projid.h>
#include <linux/fs_struct.h>
+/* sysctl */
+int unprivileged_userns_clone;
+
static struct kmem_cache *user_ns_cachep __read_mostly;
static DEFINE_MUTEX(userns_state_mutex);
--
2.15.1
From b5202296055dd333db4425120d3f93ef4e6a0573 Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Thu, 7 Dec 2017 13:50:48 +0100
Subject: ZEN: Add CONFIG for unprivileged_userns_clone
This way our default behavior continues to match the vanilla kernel.
---
init/Kconfig | 16 ++++++++++++++++
kernel/user_namespace.c | 4 ++++
2 files changed, 20 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index 4592bf7997c0..f3df02990aff 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1004,6 +1004,22 @@ config USER_NS
If unsure, say N.
+config USER_NS_UNPRIVILEGED
+ bool "Allow unprivileged users to create namespaces"
+ default y
+ depends on USER_NS
+ help
+ When disabled, unprivileged users will not be able to create
+ new namespaces. Allowing users to create their own namespaces
+ has been part of several recent local privilege escalation
+ exploits, so if you need user namespaces but are
+ paranoid^Wsecurity-conscious you want to disable this.
+
+ This setting can be overridden at runtime via the
+ kernel.unprivileged_userns_clone sysctl.
+
+ If unsure, say Y.
+
config PID_NS
bool "PID Namespaces"
default y
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 6b9dbc257e34..107b17f0d528 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -27,7 +27,11 @@
#include <linux/sort.h>
/* sysctl */
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
+int unprivileged_userns_clone = 1;
+#else
int unprivileged_userns_clone;
+#endif
static struct kmem_cache *user_ns_cachep __read_mostly;
static DEFINE_MUTEX(userns_state_mutex);

View File

@@ -0,0 +1,354 @@
From 2ac70785613ef4c6b16414986bb18bd7b60d2a13 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 14 Mar 2016 11:10:58 -0600
Subject: [PATCH] pci pme wakeups
Reduce wakeups for PME checks, which are a workaround for miswired
boards (sadly, too many of them) in laptops.
---
drivers/pci/pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c25acace7d91..0ddebdad9f5b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -61,7 +61,7 @@ struct pci_pme_device {
struct pci_dev *dev;
};
-#define PME_TIMEOUT 1000 /* How long between PME checks */
+#define PME_TIMEOUT 4000 /* How long between PME checks */
static void pci_dev_d3_sleep(struct pci_dev *dev)
{
--
2.20.1
From 7e7e36c67aa71d6a1ec5676d99d37c1fea389ceb Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 19 Mar 2016 21:32:19 -0400
Subject: [PATCH] intel_idle: tweak cpuidle cstates
Increase target_residency in cpuidle cstate
Tune intel_idle to be a bit less agressive;
Clear linux is cleaner in hygiene (wakupes) than the average linux,
so we can afford changing these in a way that increases
performance while keeping power efficiency
---
drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 8b5d85c91e9d..5e2d813a048d 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -466,7 +466,7 @@ static struct cpuidle_state hsw_cstates[] = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -474,7 +474,7 @@ static struct cpuidle_state hsw_cstates[] = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 33,
- .target_residency = 100,
+ .target_residency = 900,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -482,7 +482,7 @@ static struct cpuidle_state hsw_cstates[] = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
- .target_residency = 400,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -490,7 +490,7 @@ static struct cpuidle_state hsw_cstates[] = {
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
- .target_residency = 500,
+ .target_residency = 1500,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -498,7 +498,7 @@ static struct cpuidle_state hsw_cstates[] = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
- .target_residency = 900,
+ .target_residency = 2000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -506,7 +506,7 @@ static struct cpuidle_state hsw_cstates[] = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
- .target_residency = 1800,
+ .target_residency = 5000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -514,7 +514,7 @@ static struct cpuidle_state hsw_cstates[] = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
- .target_residency = 7700,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -534,7 +534,7 @@ static struct cpuidle_state bdw_cstates[] = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -542,7 +542,7 @@ static struct cpuidle_state bdw_cstates[] = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 40,
- .target_residency = 100,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -550,7 +550,7 @@ static struct cpuidle_state bdw_cstates[] = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
- .target_residency = 400,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -558,7 +558,7 @@ static struct cpuidle_state bdw_cstates[] = {
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
- .target_residency = 500,
+ .target_residency = 2000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -566,7 +566,7 @@ static struct cpuidle_state bdw_cstates[] = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
- .target_residency = 900,
+ .target_residency = 4000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -574,7 +574,7 @@ static struct cpuidle_state bdw_cstates[] = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
- .target_residency = 1800,
+ .target_residency = 7000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -582,7 +582,7 @@ static struct cpuidle_state bdw_cstates[] = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
- .target_residency = 7700,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -603,7 +603,7 @@ static struct cpuidle_state skl_cstates[] = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -611,7 +611,7 @@ static struct cpuidle_state skl_cstates[] = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 70,
- .target_residency = 100,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -619,7 +619,7 @@ static struct cpuidle_state skl_cstates[] = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 85,
- .target_residency = 200,
+ .target_residency = 600,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -627,7 +627,7 @@ static struct cpuidle_state skl_cstates[] = {
.desc = "MWAIT 0x33",
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 124,
- .target_residency = 800,
+ .target_residency = 3000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -635,7 +635,7 @@ static struct cpuidle_state skl_cstates[] = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 200,
- .target_residency = 800,
+ .target_residency = 3200,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -643,7 +643,7 @@ static struct cpuidle_state skl_cstates[] = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 480,
- .target_residency = 5000,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -651,7 +651,7 @@ static struct cpuidle_state skl_cstates[] = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 890,
- .target_residency = 5000,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -672,7 +672,7 @@ static struct cpuidle_state skx_cstates[] = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 300,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
--
2.20.1
From b8211d4f79dd88dfc2d4bd52be46103ea0b70e3e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 6 Jan 2017 15:34:09 +0000
Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
bigger than default
---
net/ipv4/tcp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf3c5095c10e..b30d51837b2d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3897,8 +3897,8 @@ void __init tcp_init(void)
tcp_init_mem();
/* Set per-socket limits to no more than 1/128 the pressure threshold */
limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
- max_wshare = min(4UL*1024*1024, limit);
- max_rshare = min(6UL*1024*1024, limit);
+ max_wshare = min(16UL*1024*1024, limit);
+ max_rshare = min(16UL*1024*1024, limit);
init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
--
2.20.1
From 050223869257b87e22636158a80da38d877248ed Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 18 Feb 2018 23:35:41 +0000
Subject: [PATCH] locking: rwsem: spin faster
tweak rwsem owner spinning a bit
---
kernel/locking/rwsem.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index eef04551eae7..1ec5ab4c8ff7 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -720,6 +720,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
struct task_struct *new, *owner;
unsigned long flags, new_flags;
enum owner_state state;
+ int i = 0;
owner = rwsem_owner_flags(sem, &flags);
state = rwsem_owner_state(owner, flags, nonspinnable);
@@ -753,7 +754,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
break;
}
- cpu_relax();
+ if (i++ > 1000)
+ cpu_relax();
}
rcu_read_unlock();
From b836ea320114643d4354b43acb6ec8bb06ada487 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 2 Jun 2016 23:36:32 -0500
Subject: [PATCH] drivers: Initialize ata before graphics
ATA init is the long pole in the boot process, and its asynchronous.
move the graphics init after it so that ata and graphics initialize
in parallel
---
drivers/Makefile | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/Makefile b/drivers/Makefile
index aaef17cc6512..d08f3a394929 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -58,15 +58,8 @@ obj-y += char/
# iommu/ comes before gpu as gpu are using iommu controllers
obj-y += iommu/
-# gpu/ comes after char for AGP vs DRM startup and after iommu
-obj-y += gpu/
-
obj-$(CONFIG_CONNECTOR) += connector/
-# i810fb and intelfb depend on char/agp/
-obj-$(CONFIG_FB_I810) += video/fbdev/i810/
-obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
-
obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
@@ -79,6 +72,14 @@ obj-$(CONFIG_IDE) += ide/
obj-y += scsi/
obj-y += nvme/
obj-$(CONFIG_ATA) += ata/
+
+# gpu/ comes after char for AGP vs DRM startup and after iommu
+obj-y += gpu/
+
+# i810fb and intelfb depend on char/agp/
+obj-$(CONFIG_FB_I810) += video/fbdev/i810/
+obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
+
obj-$(CONFIG_TARGET_CORE) += target/
obj-$(CONFIG_MTD) += mtd/
obj-$(CONFIG_SPI) += spi/

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,72 @@
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,78 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: glitched - MuQSS
diff --git a/kernel/sched/MuQSS.c b/kernel/sched/MuQSS.c
index 84a1d08d68551..57c3036a68952 100644
--- a/kernel/sched/MuQSS.c
+++ b/kernel/sched/MuQSS.c
@@ -163,7 +167,11 @@ int sched_interactive __read_mostly = 1;
* are allowed to run five seconds as real time tasks. This is the total over
* all online cpus.
*/
+#ifdef CONFIG_ZENIFY
+int sched_iso_cpu __read_mostly = 25;
+#else
int sched_iso_cpu __read_mostly = 70;
+#endif
/*
* sched_yield_type - Choose what sort of yield sched_yield will perform.
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -5,7 +5,7 @@
choice
prompt "Timer frequency"
default HZ_100 if SCHED_MUQSS
- default HZ_250_NODEF if !SCHED_MUQSS
+ default HZ_500_NODEF if !SCHED_MUQSS
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -50,6 +50,20 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500_NODEF
+ bool "500 HZ"
+ help
+ 500 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
+ config HZ_750_NODEF
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000_NODEF
bool "1000 HZ"
help
@@ -63,6 +70,8 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250_NODEF
default 300 if HZ_300_NODEF
+ default 500 if HZ_500_NODEF
+ default 750 if HZ_750_NODEF
default 1000 if HZ_1000_NODEF
config SCHED_HRTICK
diff --git a/Makefile b/Makefile
index d4d36c61940b..4a9dfe471f1f 100644
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,6 @@ NAME = Kleptomaniac Octopus
CKVERSION = -ck1
CKNAME = MuQSS Powered
-EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
# We are using a recursive build, so we need to do a little thinking
# to get the ordering right.

View File

@@ -0,0 +1,18 @@
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebfd5d..61e3271675d6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,10 @@
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
-#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define DEF_FREQUENCY_UP_THRESHOLD (45)
+#define DEF_SAMPLING_DOWN_FACTOR (5)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_UP_THRESHOLD (45)
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)

View File

@@ -0,0 +1,18 @@
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebfd5d..61e3271675d6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,10 @@
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD (63)
-#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
+#define DEF_SAMPLING_DOWN_FACTOR (5)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)

View File

@@ -0,0 +1,213 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: glitched - PDS
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9270a4370d54..30d01e647417 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -159,7 +159,7 @@ struct scan_control {
/*
* From 0 .. 100. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness = 20;
/*
* The total number of pages which are beyond the high watermark within all
* zones.
diff --git a/kernel/sched/pds.c b/kernel/sched/pds.c
index c2d831b242b6d18a47e0d87a9f5433a7748b52ff..5bc8d7a8f920c21feab69b2706a3328dc8d39f9a 100644
--- a/kernel/sched/pds.c
+++ b/kernel/sched/pds.c
@@ -409,12 +409,11 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
* [L] ->on_rq
* RELEASE (rq->lock)
*
- * If we observe the old CPU in task_rq_lock(), the acquire of
+ * If we observe the old CPU in task_rq_lock, the acquire of
* the old rq->lock will fully serialize against the stores.
*
- * If we observe the new CPU in task_rq_lock(), the address
- * dependency headed by '[L] rq = task_rq()' and the acquire
- * will pair with the WMB to ensure we then also see migrating.
+ * If we observe the new CPU in task_rq_lock, the acquire will
+ * pair with the WMB to ensure we must then also see migrating.
*/
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
return rq;
@@ -952,9 +953,9 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
smp_wmb();
#ifdef CONFIG_THREAD_INFO_IN_TASK
- WRITE_ONCE(p->cpu, cpu);
+ p->cpu = cpu;
#else
- WRITE_ONCE(task_thread_info(p)->cpu, cpu);
+ task_thread_info(p)->cpu = cpu;
#endif
#endif
}
@@ -1035,7 +1036,7 @@ static void detach_task(struct rq *rq, struct task_struct *p, int target_cpu)
{
lockdep_assert_held(&rq->lock);
- WRITE_ONCE(p->on_rq ,TASK_ON_RQ_MIGRATING);
+ p->on_rq = TASK_ON_RQ_MIGRATING;
if (task_contributes_to_load(p))
rq->nr_uninterruptible++;
dequeue_task(p, rq, 0);
diff --git a/kernel/sched/pds_sched.h b/kernel/sched/pds_sched.h
index 20dcf19ea057627d91be07b4ec20f0827c30084c..24fa90ca63d144cc4f45d82d88407ea70d2d2edf 100644
--- a/kernel/sched/pds_sched.h
+++ b/kernel/sched/pds_sched.h
@@ -56,7 +56,7 @@ static inline int task_on_rq_queued(struct task_struct *p)
static inline int task_on_rq_migrating(struct task_struct *p)
{
- return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
+ return p->on_rq == TASK_ON_RQ_MIGRATING;
}
enum {
diff --git a/init/Kconfig b/init/Kconfig
index 11fd9b502d06..e9bc34d3019b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -948,7 +948,6 @@ config CGROUP_DEVICE
config CGROUP_CPUACCT
bool "Simple CPU accounting controller"
- depends on !SCHED_PDS
help
Provides a simple controller for monitoring the
total CPU consumed by the tasks in a cgroup.
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index b23231bae996..cab4e5c5b38e 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -24,13 +24,13 @@ obj-y += fair.o rt.o deadline.o
obj-$(CONFIG_SMP) += cpudeadline.o topology.o stop_task.o
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o
-obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
endif
obj-y += loadavg.o clock.o cputime.o
obj-y += idle.o
obj-y += wait.o wait_bit.o swait.o completion.o
obj-$(CONFIG_SMP) += cpupri.o pelt.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
+obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
obj-$(CONFIG_MEMBARRIER) += membarrier.o
diff --git a/kernel/sched/pds.c b/kernel/sched/pds.c
index 9281ad164..f09a609cf 100644
--- a/kernel/sched/pds.c
+++ b/kernel/sched/pds.c
@@ -81,6 +81,18 @@ enum {
NR_CPU_AFFINITY_CHK_LEVEL
};
+/*
+ * This allows printing both to /proc/sched_debug and
+ * to the console
+ */
+#define SEQ_printf(m, x...) \
+ do { \
+ if (m) \
+ seq_printf(m, x); \
+ else \
+ pr_cont(x); \
+ } while (0)
+
static inline void print_scheduler_version(void)
{
printk(KERN_INFO "pds: PDS-mq CPU Scheduler 0.99o by Alfred Chen.\n");
@@ -6353,7 +6365,10 @@ void ia64_set_curr_task(int cpu, struct task_struct *p)
#ifdef CONFIG_SCHED_DEBUG
void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
struct seq_file *m)
-{}
+{
+ SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
+ get_nr_threads(p));
+}
void proc_sched_set_task(struct task_struct *p)
{}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,193 @@
From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
From: Mark Weiman <mark.weiman@markzz.com>
Date: Sun, 12 Aug 2018 11:36:21 -0400
Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
This an updated version of Alex Williamson's patch from:
https://lkml.org/lkml/2013/5/30/513
Original commit message follows:
PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
allows us to control whether transactions are allowed to be redirected
in various subnodes of a PCIe topology. For instance, if two
endpoints are below a root port or downsteam switch port, the
downstream port may optionally redirect transactions between the
devices, bypassing upstream devices. The same can happen internally
on multifunction devices. The transaction may never be visible to the
upstream devices.
One upstream device that we particularly care about is the IOMMU. If
a redirection occurs in the topology below the IOMMU, then the IOMMU
cannot provide isolation between devices. This is why the PCIe spec
encourages topologies to include ACS support. Without it, we have to
assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
Unfortunately, far too many topologies do not support ACS to make this
a steadfast requirement. Even the latest chipsets from Intel are only
sporadically supporting ACS. We have trouble getting interconnect
vendors to include the PCIe spec required PCIe capability, let alone
suggested features.
Therefore, we need to add some flexibility. The pcie_acs_override=
boot option lets users opt-in specific devices or sets of devices to
assume ACS support. The "downstream" option assumes full ACS support
on root ports and downstream switch ports. The "multifunction"
option assumes the subset of ACS features available on multifunction
endpoints and upstream switch ports are supported. The "id:nnnn:nnnn"
option enables ACS support on devices matching the provided vendor
and device IDs, allowing more strategic ACS overrides. These options
may be combined in any order. A maximum of 16 id specific overrides
are available. It's suggested to use the most limited set of options
necessary to avoid completely disabling ACS across the topology.
Note to hardware vendors, we have facilities to permanently quirk
specific devices which enforce isolation but not provide an ACS
capability. Please contact me to have your devices added and save
your customers the hassle of this boot option.
Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
---
.../admin-guide/kernel-parameters.txt | 9 ++
drivers/pci/quirks.c | 101 ++++++++++++++++++
2 files changed, 110 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index aefd358a5ca3..173b3596fd9e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3190,6 +3190,15 @@
nomsi [MSI] If the PCI_MSI kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of MSI interrupts system-wide.
+ pcie_acs_override =
+ [PCIE] Override missing PCIe ACS support for:
+ downstream
+ All downstream ports - full ACS capabilities
+ multifunction
+ All multifunction devices - multifunction ACS subset
+ id:nnnn:nnnn
+ Specific device - full ACS capabilities
+ Specified as vid:did (vendor/device ID) in hex
noioapicquirk [APIC] Disable all boot interrupt quirks.
Safety option to keep boot IRQs enabled. This
should never be necessary.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 4700d24e5d55..8f7a3d7fd9c1 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
}
+static bool acs_on_downstream;
+static bool acs_on_multifunction;
+
+#define NUM_ACS_IDS 16
+struct acs_on_id {
+ unsigned short vendor;
+ unsigned short device;
+};
+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
+static u8 max_acs_id;
+
+static __init int pcie_acs_override_setup(char *p)
+{
+ if (!p)
+ return -EINVAL;
+
+ while (*p) {
+ if (!strncmp(p, "downstream", 10))
+ acs_on_downstream = true;
+ if (!strncmp(p, "multifunction", 13))
+ acs_on_multifunction = true;
+ if (!strncmp(p, "id:", 3)) {
+ char opt[5];
+ int ret;
+ long val;
+
+ if (max_acs_id >= NUM_ACS_IDS - 1) {
+ pr_warn("Out of PCIe ACS override slots (%d)\n",
+ NUM_ACS_IDS);
+ goto next;
+ }
+
+ p += 3;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].vendor = val;
+
+ p += strcspn(p, ":");
+ if (*p != ':') {
+ pr_warn("PCIe ACS invalid ID\n");
+ goto next;
+ }
+
+ p++;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].device = val;
+ max_acs_id++;
+ }
+next:
+ p += strcspn(p, ",");
+ if (*p == ',')
+ p++;
+ }
+
+ if (acs_on_downstream || acs_on_multifunction || max_acs_id)
+ pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
+
+ return 0;
+}
+early_param("pcie_acs_override", pcie_acs_override_setup);
+
+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
+{
+ int i;
+
+ /* Never override ACS for legacy devices or devices with ACS caps */
+ if (!pci_is_pcie(dev) ||
+ pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
+ return -ENOTTY;
+
+ for (i = 0; i < max_acs_id; i++)
+ if (acs_on_ids[i].vendor == dev->vendor &&
+ acs_on_ids[i].device == dev->device)
+ return 1;
+
+ switch (pci_pcie_type(dev)) {
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ case PCI_EXP_TYPE_ROOT_PORT:
+ if (acs_on_downstream)
+ return 1;
+ break;
+ case PCI_EXP_TYPE_ENDPOINT:
+ case PCI_EXP_TYPE_UPSTREAM:
+ case PCI_EXP_TYPE_LEG_END:
+ case PCI_EXP_TYPE_RC_END:
+ if (acs_on_multifunction && dev->multifunction)
+ return 1;
+ }
+
+ return -ENOTTY;
+}
/*
* Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
* The device will throw a Link Down error on AER-capable systems and
@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled {
{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
/* Zhaoxin Root/Downstream Ports */
{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
{ 0 }
};

View File

@@ -0,0 +1,419 @@
split the futex key setup from the queue locking and key reading. This
is useful to support the setup of multiple keys at the same time, like
what is done in futex_requeue() and what will be done for the
FUTEX_WAIT_MULTIPLE command.
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
kernel/futex.c | 71 +++++++++++++++++++++++++++++---------------------
1 file changed, 42 insertions(+), 29 deletions(-)
diff --git a/kernel/futex.c b/kernel/futex.c
index 6d50728ef2e7..91f3db335c57 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2631,6 +2631,39 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
__set_current_state(TASK_RUNNING);
}
+static int __futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
+ struct futex_q *q, struct futex_hash_bucket **hb)
+{
+
+ u32 uval;
+ int ret;
+
+retry_private:
+ *hb = queue_lock(q);
+
+ ret = get_futex_value_locked(&uval, uaddr);
+
+ if (ret) {
+ queue_unlock(*hb);
+
+ ret = get_user(uval, uaddr);
+ if (ret)
+ return ret;
+
+ if (!(flags & FLAGS_SHARED))
+ goto retry_private;
+
+ return 1;
+ }
+
+ if (uval != val) {
+ queue_unlock(*hb);
+ ret = -EWOULDBLOCK;
+ }
+
+ return ret;
+}
+
/**
* futex_wait_setup() - Prepare to wait on a futex
* @uaddr: the futex userspace address
@@ -2651,7 +2684,6 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
struct futex_q *q, struct futex_hash_bucket **hb)
{
- u32 uval;
int ret;
/*
@@ -2672,38 +2704,19 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
* absorb a wakeup if *uaddr does not match the desired values
* while the syscall executes.
*/
-retry:
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
- if (unlikely(ret != 0))
- return ret;
-
-retry_private:
- *hb = queue_lock(q);
+ do {
+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED,
+ &q->key, FUTEX_READ);
+ if (unlikely(ret != 0))
+ return ret;
- ret = get_futex_value_locked(&uval, uaddr);
+ ret = __futex_wait_setup(uaddr, val, flags, q, hb);
- if (ret) {
- queue_unlock(*hb);
-
- ret = get_user(uval, uaddr);
+ /* Drop key reference if retry or error. */
if (ret)
- goto out;
+ put_futex_key(&q->key);
+ } while (ret > 0);
- if (!(flags & FLAGS_SHARED))
- goto retry_private;
-
- put_futex_key(&q->key);
- goto retry;
- }
-
- if (uval != val) {
- queue_unlock(*hb);
- ret = -EWOULDBLOCK;
- }
-
-out:
- if (ret)
- put_futex_key(&q->key);
return ret;
}
--
2.20.1
This is a new futex operation, called FUTEX_WAIT_MULTIPLE, which allows
a thread to wait on several futexes at the same time, and be awoken by
any of them. In a sense, it implements one of the features that was
supported by pooling on the old FUTEX_FD interface.
My use case for this operation lies in Wine, where we want to implement
a similar interface available in Windows, used mainly for event
handling. The wine folks have an implementation that uses eventfd, but
it suffers from FD exhaustion (I was told they have application that go
to the order of multi-milion FDs), and higher CPU utilization.
In time, we are also proposing modifications to glibc and libpthread to
make this feature available for Linux native multithreaded applications
using libpthread, which can benefit from the behavior of waiting on any
of a group of futexes.
In particular, using futexes in our Wine use case reduced the CPU
utilization by 4% for the game Beat Saber and by 1.5% for the game
Shadow of Tomb Raider, both running over Proton (a wine based solution
for Windows emulation), when compared to the eventfd interface. This
implementation also doesn't rely of file descriptors, so it doesn't risk
overflowing the resource.
Technically, the existing FUTEX_WAIT implementation can be easily
reworked by using do_futex_wait_multiple with a count of one, and I
have a patch showing how it works. I'm not proposing it, since
futex is such a tricky code, that I'd be more confortable to have
FUTEX_WAIT_MULTIPLE running upstream for a couple development cycles,
before considering modifying FUTEX_WAIT.
From an implementation perspective, the futex list is passed as an array
of (pointer,value,bitset) to the kernel, which will enqueue all of them
and sleep if none was already triggered. It returns a hint of which
futex caused the wake up event to userspace, but the hint doesn't
guarantee that is the only futex triggered. Before calling the syscall
again, userspace should traverse the list, trying to re-acquire any of
the other futexes, to prevent an immediate -EWOULDBLOCK return code from
the kernel.
This was tested using three mechanisms:
1) By reimplementing FUTEX_WAIT in terms of FUTEX_WAIT_MULTIPLE and
running the unmodified tools/testing/selftests/futex and a full linux
distro on top of this kernel.
2) By an example code that exercises the FUTEX_WAIT_MULTIPLE path on a
multi-threaded, event-handling setup.
3) By running the Wine fsync implementation and executing multi-threaded
applications, in particular the modern games mentioned above, on top of
this implementation.
Signed-off-by: Zebediah Figura <z.figura12@gmail.com>
Signed-off-by: Steven Noonan <steven@valvesoftware.com>
Signed-off-by: Pierre-Loup A. Griffais <pgriffais@valvesoftware.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
include/uapi/linux/futex.h | 7 ++
kernel/futex.c | 161 ++++++++++++++++++++++++++++++++++++-
2 files changed, 164 insertions(+), 4 deletions(-)
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index a89eb0accd5e..2401c4cf5095 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -21,6 +21,7 @@
#define FUTEX_WAKE_BITSET 10
#define FUTEX_WAIT_REQUEUE_PI 11
#define FUTEX_CMP_REQUEUE_PI 12
+#define FUTEX_WAIT_MULTIPLE 31
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
@@ -150,4 +151,10 @@ struct robust_list_head {
(((op & 0xf) << 28) | ((cmp & 0xf) << 24) \
| ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
+struct futex_wait_block {
+ __u32 __user *uaddr;
+ __u32 val;
+ __u32 bitset;
+};
+
#endif /* _UAPI_LINUX_FUTEX_H */
diff --git a/kernel/futex.c b/kernel/futex.c
index 91f3db335c57..2623e8f152cd 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -183,6 +183,7 @@ static int __read_mostly futex_cmpxchg_enabled;
#endif
#define FLAGS_CLOCKRT 0x02
#define FLAGS_HAS_TIMEOUT 0x04
+#define FLAGS_WAKE_MULTIPLE 0x08
/*
* Priority Inheritance state:
@@ -2720,6 +2721,150 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
return ret;
}
+static int do_futex_wait_multiple(struct futex_wait_block *wb,
+ u32 count, unsigned int flags,
+ ktime_t *abs_time)
+{
+
+ struct hrtimer_sleeper timeout, *to;
+ struct futex_hash_bucket *hb;
+ struct futex_q *qs = NULL;
+ int ret;
+ int i;
+
+ qs = kcalloc(count, sizeof(struct futex_q), GFP_KERNEL);
+ if (!qs)
+ return -ENOMEM;
+
+ to = futex_setup_timer(abs_time, &timeout, flags,
+ current->timer_slack_ns);
+ retry:
+ for (i = 0; i < count; i++) {
+ qs[i].key = FUTEX_KEY_INIT;
+ qs[i].bitset = wb[i].bitset;
+
+ ret = get_futex_key(wb[i].uaddr, flags & FLAGS_SHARED,
+ &qs[i].key, FUTEX_READ);
+ if (unlikely(ret != 0)) {
+ for (--i; i >= 0; i--)
+ put_futex_key(&qs[i].key);
+ goto out;
+ }
+ }
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ for (i = 0; i < count; i++) {
+ ret = __futex_wait_setup(wb[i].uaddr, wb[i].val,
+ flags, &qs[i], &hb);
+ if (ret) {
+ /* Drop the failed key directly. keys 0..(i-1)
+ * will be put by unqueue_me.
+ */
+ put_futex_key(&qs[i].key);
+
+ /* Undo the partial work we did. */
+ for (--i; i >= 0; i--)
+ unqueue_me(&qs[i]);
+
+ __set_current_state(TASK_RUNNING);
+ if (ret > 0)
+ goto retry;
+ goto out;
+ }
+
+ /* We can't hold to the bucket lock when dealing with
+ * the next futex. Queue ourselves now so we can unlock
+ * it before moving on.
+ */
+ queue_me(&qs[i], hb);
+ }
+
+ if (to)
+ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
+
+ /* There is no easy to way to check if we are wake already on
+ * multiple futexes without waking through each one of them. So
+ * just sleep and let the scheduler handle it.
+ */
+ if (!to || to->task)
+ freezable_schedule();
+
+ __set_current_state(TASK_RUNNING);
+
+ ret = -ETIMEDOUT;
+ /* If we were woken (and unqueued), we succeeded. */
+ for (i = 0; i < count; i++)
+ if (!unqueue_me(&qs[i]))
+ ret = i;
+
+ /* Succeed wakeup */
+ if (ret >= 0)
+ goto out;
+
+ /* Woken by triggered timeout */
+ if (to && !to->task)
+ goto out;
+
+ /*
+ * We expect signal_pending(current), but we might be the
+ * victim of a spurious wakeup as well.
+ */
+ if (!signal_pending(current))
+ goto retry;
+
+ ret = -ERESTARTSYS;
+ if (!abs_time)
+ goto out;
+
+ ret = -ERESTART_RESTARTBLOCK;
+ out:
+ if (to) {
+ hrtimer_cancel(&to->timer);
+ destroy_hrtimer_on_stack(&to->timer);
+ }
+
+ kfree(qs);
+ return ret;
+}
+
+static int futex_wait_multiple(u32 __user *uaddr, unsigned int flags,
+ u32 count, ktime_t *abs_time)
+{
+ struct futex_wait_block *wb;
+ struct restart_block *restart;
+ int ret;
+
+ if (!count)
+ return -EINVAL;
+
+ wb = kcalloc(count, sizeof(struct futex_wait_block), GFP_KERNEL);
+ if (!wb)
+ return -ENOMEM;
+
+ if (copy_from_user(wb, uaddr,
+ count * sizeof(struct futex_wait_block))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = do_futex_wait_multiple(wb, count, flags, abs_time);
+
+ if (ret == -ERESTART_RESTARTBLOCK) {
+ restart = &current->restart_block;
+ restart->fn = futex_wait_restart;
+ restart->futex.uaddr = uaddr;
+ restart->futex.val = count;
+ restart->futex.time = *abs_time;
+ restart->futex.flags = (flags | FLAGS_HAS_TIMEOUT |
+ FLAGS_WAKE_MULTIPLE);
+ }
+
+out:
+ kfree(wb);
+ return ret;
+}
+
static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
ktime_t *abs_time, u32 bitset)
{
@@ -2797,6 +2942,10 @@ static long futex_wait_restart(struct restart_block *restart)
}
restart->fn = do_no_restart_syscall;
+ if (restart->futex.flags & FLAGS_WAKE_MULTIPLE)
+ return (long)futex_wait_multiple(uaddr, restart->futex.flags,
+ restart->futex.val, tp);
+
return (long)futex_wait(uaddr, restart->futex.flags,
restart->futex.val, tp, restart->futex.bitset);
}
@@ -3680,6 +3829,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
uaddr2);
case FUTEX_CMP_REQUEUE_PI:
return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
+ case FUTEX_WAIT_MULTIPLE:
+ return futex_wait_multiple(uaddr, flags, val, timeout);
}
return -ENOSYS;
}
@@ -3696,7 +3847,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
cmd == FUTEX_WAIT_BITSET ||
- cmd == FUTEX_WAIT_REQUEUE_PI)) {
+ cmd == FUTEX_WAIT_REQUEUE_PI ||
+ cmd == FUTEX_WAIT_MULTIPLE)) {
if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
return -EFAULT;
if (get_timespec64(&ts, utime))
@@ -3705,7 +3857,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
return -EINVAL;
t = timespec64_to_ktime(ts);
- if (cmd == FUTEX_WAIT)
+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
@@ -3889,14 +4041,15 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
cmd == FUTEX_WAIT_BITSET ||
- cmd == FUTEX_WAIT_REQUEUE_PI)) {
+ cmd == FUTEX_WAIT_REQUEUE_PI ||
+ cmd == FUTEX_WAIT_MULTIPLE)) {
if (get_old_timespec32(&ts, utime))
return -EFAULT;
if (!timespec64_valid(&ts))
return -EINVAL;
t = timespec64_to_ktime(ts);
- if (cmd == FUTEX_WAIT)
+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
--
2.20.1

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,108 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: glitched - BMQ
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebfd5d..61e3271675d6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,10 @@
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD (63)
-#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
+#define DEF_SAMPLING_DOWN_FACTOR (5)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9270a4370d54..30d01e647417 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -159,7 +159,7 @@ struct scan_control {
/*
* From 0 .. 100. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness = 20;
/*
* The total number of pages which are beyond the high watermark within all
* zones.

View File

@@ -0,0 +1,43 @@
From 1e010beda2896bdf3082fb37a3e49f8ce20e04d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io>
Date: Thu, 2 May 2019 05:28:08 +0100
Subject: [PATCH] x86/fpu: Export kernel_fpu_{begin,end}() with
EXPORT_SYMBOL_GPL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
We need these symbols in zfs as the fpu implementation breaks userspace:
https://github.com/zfsonlinux/zfs/issues/9346
Signed-off-by: Jörg Thalheim <joerg@thalheim.io>
---
arch/x86/kernel/fpu/core.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 12c70840980e..352538b3bb5d 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -102,7 +102,7 @@ void kernel_fpu_begin(void)
}
__cpu_invalidate_fpregs_state();
}
-EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+EXPORT_SYMBOL(kernel_fpu_begin);
void kernel_fpu_end(void)
{
@@ -111,7 +111,7 @@ void kernel_fpu_end(void)
this_cpu_write(in_kernel_fpu, false);
preempt_enable();
}
-EXPORT_SYMBOL_GPL(kernel_fpu_end);
+EXPORT_SYMBOL(kernel_fpu_end);
/*
* Save the FPU state (mark it for reload if necessary):
--
2.23.0

File diff suppressed because it is too large Load Diff