diff --git a/PKGBUILD b/PKGBUILD index e3c508c..80513cb 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -402,6 +402,51 @@ case $_basever in '49262ce4a8089fa70275aad742fc914baa28d9c384f710c9a62f64796d13e104' '7fb1104c167edb79ec8fbdcde97940ed0f806aa978bdd14d0c665a1d76d25c24') ;; + 512) + opt_ver="5.8%2B" + source=("$kernel_site" + #"$patch_site" + "https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch" + 'config.x86_64' # stock Arch config + #'config_hardened.x86_64' # hardened Arch config + 90-cleanup.hook + cleanup + # ARCH Patches + 0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch + # TkG + 0002-clear-patches.patch + 0003-glitched-base.patch + 0003-glitched-cfs.patch + #0004-glitched-ondemand-muqss.patch + #0004-glitched-muqss.patch + #0004-5.12-ck1.patch + #0005-undead-glitched-ondemand-pds.patch + #0005-undead-glitched-pds.patch + #0005-v5.12_undead-pds099o.patch + #0005-glitched-pds.patch + 0006-add-acs-overrides_iommu.patch + 0007-v5.12-fsync.patch + #0007-v5.12-futex2_interface.patch + #0008-5.12-bcachefs.patch + #0009-glitched-ondemand-bmq.patch + #0009-glitched-bmq.patch + #0009-prjc_v5.12-r0.patch + #0012-linux-hardened.patch + 0012-misc-additions.patch + ) + sha256sums=('d57d9b8c595960bc21137c2d6312aca417029cc45a264f500d047ee1270193d5' + 'SKIP' + '5cc3767b4005fc21c53863ceda75f287a9d2be772dbdee861a5cbec9cc62c1bd' + '1e15fc2ef3fa770217ecc63a220e5df2ddbcf3295eb4a021171e7edd4c6cc898' + '66a03c246037451a77b4d448565b1d7e9368270c7d02872fbd0b5d024ed0a997' + 'f6383abef027fd9a430fd33415355e0df492cdc3c90e9938bf2d98f4f63b32e6' + '35a7cde86fb94939c0f25a62b8c47f3de0dbd3c65f876f460b263181b3e92fc0' + '1ac97da07e72ec7e2b0923d32daacacfaa632a44c714d6942d9f143fe239e1b5' + '7058e57fd68367b029adc77f2a82928f1433daaf02c8c279cb2d13556c8804d7' + '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a' + 'b302ba6c5bbe8ed19b20207505d513208fae1e678cf4d8e7ac0b154e5fe3f456' + '7fb1104c167edb79ec8fbdcde97940ed0f806aa978bdd14d0c665a1d76d25c24') + ;; esac export KBUILD_BUILD_HOST=archlinux diff --git a/install.sh b/install.sh index 2f745e8..0735d1c 100755 --- a/install.sh +++ b/install.sh @@ -225,6 +225,9 @@ if [ "$1" = "install" ] || [ "$1" = "config" ]; then "511") opt_ver="5.8" ;; + "512") + opt_ver="5.8" + ;; esac wget "https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1+_kernel_v${opt_ver}+.patch" diff --git a/linux-tkg-config/prepare b/linux-tkg-config/prepare index 5941e3c..85c5e6c 100644 --- a/linux-tkg-config/prepare +++ b/linux-tkg-config/prepare @@ -6,6 +6,7 @@ ver58=18 ver59=16 ver510=19 ver511=2 +ver512=rc1 _tkg_initscript() { @@ -27,7 +28,7 @@ _tkg_initscript() { # to the rest of the script if [ -z "$_version" ] && [ ! -e "$_path"/versel ]; then plain "Which kernel version do you want to install?" - read -rp "`echo $' 1. 5.4 LTS\n 2. 5.7\n 3. 5.8\n 4. 5.9\n 5. 5.10 LTS\n > 6. 5.11\nchoice[1-6?]'`" _VERSEL; + read -rp "`echo $' 1. 5.4 LTS\n 2. 5.7\n 3. 5.8\n 4. 5.9\n 5. 5.10 LTS\n > 6. 5.11\n 7. 5.12 RC\nchoice[1-7?]'`" _VERSEL; case $_VERSEL in "1") echo "_basever=54" > "$_path"/versel @@ -54,6 +55,11 @@ _tkg_initscript() { echo "_basekernel=5.10" >> "$_path"/versel echo "_sub=${ver510}" >> "$_path"/versel ;; + "7") + echo "_basever=512" > "$_path"/versel + echo "_basekernel=5.12" >> "$_path"/versel + echo "_sub=${ver512}" >> "$_path"/versel + ;; *) echo "_basever=511" > "$_path"/versel echo "_basekernel=5.11" >> "$_path"/versel @@ -92,6 +98,11 @@ _tkg_initscript() { echo "_basekernel=5.11" >> "$_path"/versel echo "_sub=${ver511}" >> "$_path"/versel ;; + "5.12") + echo "_basever=512" > "$_path"/versel + echo "_basekernel=5.12" >> "$_path"/versel + echo "_sub=${ver512}" >> "$_path"/versel + ;; *) error "There is something wrong with your kernel version selection, exiting..." exit 1 diff --git a/linux-tkg-patches/5.12/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux-tkg-patches/5.12/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch new file mode 100644 index 0000000..83240cb --- /dev/null +++ b/linux-tkg-patches/5.12/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch @@ -0,0 +1,156 @@ +From 5ec2dd3a095442ec1a21d86042a4994f2ba24e63 Mon Sep 17 00:00:00 2001 +Message-Id: <5ec2dd3a095442ec1a21d86042a4994f2ba24e63.1512651251.git.jan.steffens@gmail.com> +From: Serge Hallyn +Date: Fri, 31 May 2013 19:12:12 +0100 +Subject: [PATCH] add sysctl to disallow unprivileged CLONE_NEWUSER by default + +Signed-off-by: Serge Hallyn +[bwh: Remove unneeded binary sysctl bits] +Signed-off-by: Daniel Micay +--- + kernel/fork.c | 15 +++++++++++++++ + kernel/sysctl.c | 12 ++++++++++++ + kernel/user_namespace.c | 3 +++ + 3 files changed, 30 insertions(+) + +diff --git a/kernel/fork.c b/kernel/fork.c +index 07cc743698d3668e..4011d68a8ff9305c 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -102,6 +102,11 @@ + + #define CREATE_TRACE_POINTS + #include ++#ifdef CONFIG_USER_NS ++extern int unprivileged_userns_clone; ++#else ++#define unprivileged_userns_clone 0 ++#endif + + /* + * Minimum number of threads to boot the kernel +@@ -1555,6 +1560,10 @@ static __latent_entropy struct task_struct *copy_process( + if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) + return ERR_PTR(-EINVAL); + ++ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) ++ if (!capable(CAP_SYS_ADMIN)) ++ return ERR_PTR(-EPERM); ++ + /* + * Thread groups must share signals as well, and detached threads + * can only be started up within the thread group. +@@ -2348,6 +2357,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) + if (unshare_flags & CLONE_NEWNS) + unshare_flags |= CLONE_FS; + ++ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { ++ err = -EPERM; ++ if (!capable(CAP_SYS_ADMIN)) ++ goto bad_unshare_out; ++ } ++ + err = check_unshare_flags(unshare_flags); + if (err) + goto bad_unshare_out; +diff --git a/kernel/sysctl.c b/kernel/sysctl.c +index b86520ed3fb60fbf..f7dab3760839f1a1 100644 +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -105,6 +105,9 @@ extern int core_uses_pid; + + #if defined(CONFIG_SYSCTL) + ++#ifdef CONFIG_USER_NS ++extern int unprivileged_userns_clone; ++#endif + /* Constants used for minimum and maximum */ + #ifdef CONFIG_LOCKUP_DETECTOR + static int sixty = 60; +@@ -513,6 +516,15 @@ static struct ctl_table kern_table[] = { + .proc_handler = proc_dointvec, + }, + #endif ++#ifdef CONFIG_USER_NS ++ { ++ .procname = "unprivileged_userns_clone", ++ .data = &unprivileged_userns_clone, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++#endif + #ifdef CONFIG_PROC_SYSCTL + { + .procname = "tainted", +diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c +index c490f1e4313b998a..dd03bd39d7bf194d 100644 +--- a/kernel/user_namespace.c ++++ b/kernel/user_namespace.c +@@ -24,6 +24,9 @@ + #include + #include + ++/* sysctl */ ++int unprivileged_userns_clone; ++ + static struct kmem_cache *user_ns_cachep __read_mostly; + static DEFINE_MUTEX(userns_state_mutex); + +-- +2.15.1 + +From b5202296055dd333db4425120d3f93ef4e6a0573 Mon Sep 17 00:00:00 2001 +From: "Jan Alexander Steffens (heftig)" +Date: Thu, 7 Dec 2017 13:50:48 +0100 +Subject: ZEN: Add CONFIG for unprivileged_userns_clone + +This way our default behavior continues to match the vanilla kernel. +--- + init/Kconfig | 16 ++++++++++++++++ + kernel/user_namespace.c | 4 ++++ + 2 files changed, 20 insertions(+) + +diff --git a/init/Kconfig b/init/Kconfig +index 4592bf7997c0..f3df02990aff 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -1004,6 +1004,22 @@ config USER_NS + + If unsure, say N. + ++config USER_NS_UNPRIVILEGED ++ bool "Allow unprivileged users to create namespaces" ++ default y ++ depends on USER_NS ++ help ++ When disabled, unprivileged users will not be able to create ++ new namespaces. Allowing users to create their own namespaces ++ has been part of several recent local privilege escalation ++ exploits, so if you need user namespaces but are ++ paranoid^Wsecurity-conscious you want to disable this. ++ ++ This setting can be overridden at runtime via the ++ kernel.unprivileged_userns_clone sysctl. ++ ++ If unsure, say Y. ++ + config PID_NS + bool "PID Namespaces" + default y +diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c +index 6b9dbc257e34..107b17f0d528 100644 +--- a/kernel/user_namespace.c ++++ b/kernel/user_namespace.c +@@ -27,7 +27,11 @@ + #include + + /* sysctl */ ++#ifdef CONFIG_USER_NS_UNPRIVILEGED ++int unprivileged_userns_clone = 1; ++#else + int unprivileged_userns_clone; ++#endif + + static struct kmem_cache *user_ns_cachep __read_mostly; + static DEFINE_MUTEX(userns_state_mutex); diff --git a/linux-tkg-patches/5.12/0002-clear-patches.patch b/linux-tkg-patches/5.12/0002-clear-patches.patch new file mode 100644 index 0000000..22a32f5 --- /dev/null +++ b/linux-tkg-patches/5.12/0002-clear-patches.patch @@ -0,0 +1,360 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Mon, 14 Mar 2016 11:10:58 -0600 +Subject: [PATCH] pci pme wakeups + +Reduce wakeups for PME checks, which are a workaround for miswired +boards (sadly, too many of them) in laptops. +--- + drivers/pci/pci.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c +index c9338f9..6974fbf 100644 +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -62,7 +62,7 @@ struct pci_pme_device { + struct pci_dev *dev; + }; + +-#define PME_TIMEOUT 1000 /* How long between PME checks */ ++#define PME_TIMEOUT 4000 /* How long between PME checks */ + + static void pci_dev_d3_sleep(struct pci_dev *dev) + { +-- +https://clearlinux.org + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Sat, 19 Mar 2016 21:32:19 -0400 +Subject: [PATCH] intel_idle: tweak cpuidle cstates + +Increase target_residency in cpuidle cstate + +Tune intel_idle to be a bit less agressive; +Clear linux is cleaner in hygiene (wakupes) than the average linux, +so we can afford changing these in a way that increases +performance while keeping power efficiency +--- + drivers/idle/intel_idle.c | 44 +++++++++++++++++++-------------------- + 1 file changed, 22 insertions(+), 22 deletions(-) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index f449584..c994d24 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -531,7 +531,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 10, +- .target_residency = 20, ++ .target_residency = 120, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -539,7 +539,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { + .desc = "MWAIT 0x10", + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 33, +- .target_residency = 100, ++ .target_residency = 900, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -547,7 +547,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 133, +- .target_residency = 400, ++ .target_residency = 1000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -555,7 +555,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { + .desc = "MWAIT 0x32", + .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 166, +- .target_residency = 500, ++ .target_residency = 1500, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -563,7 +563,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { + .desc = "MWAIT 0x40", + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 300, +- .target_residency = 900, ++ .target_residency = 2000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -571,7 +571,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { + .desc = "MWAIT 0x50", + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 600, +- .target_residency = 1800, ++ .target_residency = 5000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -579,7 +579,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 2600, +- .target_residency = 7700, ++ .target_residency = 9000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -599,7 +599,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 10, +- .target_residency = 20, ++ .target_residency = 120, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -607,7 +607,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { + .desc = "MWAIT 0x10", + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 40, +- .target_residency = 100, ++ .target_residency = 1000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -615,7 +615,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 133, +- .target_residency = 400, ++ .target_residency = 1000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -623,7 +623,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { + .desc = "MWAIT 0x32", + .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 166, +- .target_residency = 500, ++ .target_residency = 2000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -631,7 +631,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { + .desc = "MWAIT 0x40", + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 300, +- .target_residency = 900, ++ .target_residency = 4000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -639,7 +639,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { + .desc = "MWAIT 0x50", + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 600, +- .target_residency = 1800, ++ .target_residency = 7000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -647,7 +647,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 2600, +- .target_residency = 7700, ++ .target_residency = 9000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -668,7 +668,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 10, +- .target_residency = 20, ++ .target_residency = 120, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -676,7 +676,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + .desc = "MWAIT 0x10", + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 70, +- .target_residency = 100, ++ .target_residency = 1000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -684,7 +684,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 85, +- .target_residency = 200, ++ .target_residency = 600, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -692,7 +692,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + .desc = "MWAIT 0x33", + .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 124, +- .target_residency = 800, ++ .target_residency = 3000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -700,7 +700,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + .desc = "MWAIT 0x40", + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 200, +- .target_residency = 800, ++ .target_residency = 3200, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -708,7 +708,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + .desc = "MWAIT 0x50", + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 480, +- .target_residency = 5000, ++ .target_residency = 9000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -716,7 +716,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 890, +- .target_residency = 5000, ++ .target_residency = 9000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +@@ -737,7 +737,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 10, +- .target_residency = 20, ++ .target_residency = 300, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { +-- +https://clearlinux.org + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Fri, 6 Jan 2017 15:34:09 +0000 +Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little + bigger than default + +--- + net/ipv4/tcp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 30c1142..4345075 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -4201,8 +4201,8 @@ void __init tcp_init(void) + tcp_init_mem(); + /* Set per-socket limits to no more than 1/128 the pressure threshold */ + limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); +- max_wshare = min(4UL*1024*1024, limit); +- max_rshare = min(6UL*1024*1024, limit); ++ max_wshare = min(16UL*1024*1024, limit); ++ max_rshare = min(16UL*1024*1024, limit); + + init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; + init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; +-- +https://clearlinux.org + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Sun, 18 Feb 2018 23:35:41 +0000 +Subject: [PATCH] locking: rwsem: spin faster + +tweak rwsem owner spinning a bit +--- + kernel/locking/rwsem.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c +index f11b9bd..1bbfcc1 100644 +--- a/kernel/locking/rwsem.c ++++ b/kernel/locking/rwsem.c +@@ -717,6 +717,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) + struct task_struct *new, *owner; + unsigned long flags, new_flags; + enum owner_state state; ++ int i = 0; + + owner = rwsem_owner_flags(sem, &flags); + state = rwsem_owner_state(owner, flags, nonspinnable); +@@ -750,7 +751,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) + break; + } + +- cpu_relax(); ++ if (i++ > 1000) ++ cpu_relax(); + } + rcu_read_unlock(); + +-- +https://clearlinux.org + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Thu, 2 Jun 2016 23:36:32 -0500 +Subject: [PATCH] initialize ata before graphics + +ATA init is the long pole in the boot process, and its asynchronous. +move the graphics init after it so that ata and graphics initialize +in parallel +--- + drivers/Makefile | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/drivers/Makefile b/drivers/Makefile +index c0cd1b9..af1e2fb 100644 +--- a/drivers/Makefile ++++ b/drivers/Makefile +@@ -59,15 +59,8 @@ obj-y += char/ + # iommu/ comes before gpu as gpu are using iommu controllers + obj-y += iommu/ + +-# gpu/ comes after char for AGP vs DRM startup and after iommu +-obj-y += gpu/ +- + obj-$(CONFIG_CONNECTOR) += connector/ + +-# i810fb and intelfb depend on char/agp/ +-obj-$(CONFIG_FB_I810) += video/fbdev/i810/ +-obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ +- + obj-$(CONFIG_PARPORT) += parport/ + obj-$(CONFIG_NVM) += lightnvm/ + obj-y += base/ block/ misc/ mfd/ nfc/ +@@ -80,6 +73,14 @@ obj-$(CONFIG_IDE) += ide/ + obj-y += scsi/ + obj-y += nvme/ + obj-$(CONFIG_ATA) += ata/ ++ ++# gpu/ comes after char for AGP vs DRM startup and after iommu ++obj-y += gpu/ ++ ++# i810fb and intelfb depend on char/agp/ ++obj-$(CONFIG_FB_I810) += video/fbdev/i810/ ++obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ ++ + obj-$(CONFIG_TARGET_CORE) += target/ + obj-$(CONFIG_MTD) += mtd/ + obj-$(CONFIG_SPI) += spi/ +-- +https://clearlinux.org + diff --git a/linux-tkg-patches/5.12/0003-glitched-base.patch b/linux-tkg-patches/5.12/0003-glitched-base.patch new file mode 100644 index 0000000..d0bb7d3 --- /dev/null +++ b/linux-tkg-patches/5.12/0003-glitched-base.patch @@ -0,0 +1,678 @@ +From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Wed, 4 Jul 2018 04:30:08 +0200 +Subject: [PATCH 01/17] glitched + +--- + scripts/mkcompile_h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h +index baf3ab8d9d49..854e32e6aec7 100755 +--- a/scripts/mkcompile_h ++++ b/scripts/mkcompile_h +@@ -41,8 +41,8 @@ else + fi + + UTS_VERSION="#$VERSION" +-CONFIG_FLAGS="" +-if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi ++CONFIG_FLAGS="TKG" ++if [ -n "$SMP" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS SMP"; fi + if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi + if [ -n "$PREEMPT_RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT_RT"; fi + +-- +2.28.0 + + +From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Mon, 29 Jan 2018 16:59:22 +0000 +Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which + VFS caches are reclaimed + +Signed-off-by: Alexandre Frade +--- + fs/dcache.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/dcache.c b/fs/dcache.c +index 361ea7ab30ea..0c5cf69b241a 100644 +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -71,7 +71,7 @@ + * If no ancestor relationship: + * arbitrary, since it's serialized on rename_lock + */ +-int sysctl_vfs_cache_pressure __read_mostly = 100; ++int sysctl_vfs_cache_pressure __read_mostly = 50; + EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); + + __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); +-- +2.28.0 + + +From 28f32f59d9d55ac7ec3a20b79bdd02d2a0a5f7e1 Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Mon, 29 Jan 2018 18:29:13 +0000 +Subject: [PATCH 03/17] sched/core: nr_migrate = 128 increases number of tasks + to iterate in a single balance run. + +Signed-off-by: Alexandre Frade +--- + kernel/sched/core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index f788cd61df21..2bfbb4213707 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -59,7 +59,7 @@ const_debug unsigned int sysctl_sched_features = + * Number of tasks to iterate in a single balance run. + * Limited because this is done with IRQs disabled. + */ +-const_debug unsigned int sysctl_sched_nr_migrate = 32; ++const_debug unsigned int sysctl_sched_nr_migrate = 128; + + /* + * period over which we measure -rt task CPU usage in us. +@@ -71,9 +71,9 @@ __read_mostly int scheduler_running; + + /* + * part of the period that we allow rt tasks to run in us. +- * default: 0.95s ++ * XanMod default: 0.98s + */ +-int sysctl_sched_rt_runtime = 950000; ++int sysctl_sched_rt_runtime = 980000; + + /* + * __task_rq_lock - lock the rq @p resides on. +-- +2.28.0 + + +From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Mon, 29 Jan 2018 17:41:29 +0000 +Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo + +Signed-off-by: Alexandre Frade +--- + scripts/setlocalversion | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/setlocalversion b/scripts/setlocalversion +index 20f2efd57b11..0552d8b9f582 100755 +--- a/scripts/setlocalversion ++++ b/scripts/setlocalversion +@@ -54,7 +54,7 @@ scm_version() + # If only the short version is requested, don't bother + # running further git commands + if $short; then +- echo "+" ++ # echo "+" + return + fi + # If we are past a tagged commit (like +-- +2.28.0 + + +From 61fcb33fb0de8bc0f060e0a1ada38ed149217f4d Mon Sep 17 00:00:00 2001 +From: Oleksandr Natalenko +Date: Wed, 11 Dec 2019 11:46:19 +0100 +Subject: [PATCH 05/17] init/Kconfig: enable -O3 for all arches + +Building a kernel with -O3 may help in hunting bugs like [1] and thus +using this switch should not be restricted to one specific arch only. + +With that, lets expose it for everyone. + +[1] https://lore.kernel.org/lkml/673b885183fb64f1cbb3ed2387524077@natalenko.name/ + +Signed-off-by: Oleksandr Natalenko +--- + init/Kconfig | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/init/Kconfig b/init/Kconfig +index 0498af567f70..3ae8678e1145 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -1278,7 +1278,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE + + config CC_OPTIMIZE_FOR_PERFORMANCE_O3 + bool "Optimize more for performance (-O3)" +- depends on ARC + help + Choosing this option will pass "-O3" to your compiler to optimize + the kernel yet more for performance. +-- +2.28.0 + + +From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001 +From: "Jan Alexander Steffens (heftig)" +Date: Fri, 26 Oct 2018 11:22:33 +0100 +Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3 + inlining + +--- + drivers/infiniband/core/addr.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c +index 3a98439bba83..6efc4f907f58 100644 +--- a/drivers/infiniband/core/addr.c ++++ b/drivers/infiniband/core/addr.c +@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, + union { + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; ++ struct sockaddr_ib _sockaddr_ib; + } sgid_addr, dgid_addr; + int ret; + +-- +2.28.0 + + +From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001 +From: Etienne Juvigny +Date: Mon, 3 Sep 2018 17:36:25 +0200 +Subject: [PATCH 07/17] Zenify & stuff + +--- + init/Kconfig | 32 ++++++++++++++++++++++++++++++++ + kernel/sched/fair.c | 25 +++++++++++++++++++++++++ + mm/page-writeback.c | 8 ++++++++ + 3 files changed, 65 insertions(+) + +diff --git a/init/Kconfig b/init/Kconfig +index 3ae8678e1145..da708eed0f1e 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK + + menu "General setup" + ++config ZENIFY ++ bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience" ++ default y ++ help ++ Tunes the kernel for responsiveness at the cost of throughput and power usage. ++ ++ --- Virtual Memory Subsystem --------------------------- ++ ++ Mem dirty before bg writeback..: 10 % -> 20 % ++ Mem dirty before sync writeback: 20 % -> 50 % ++ ++ --- Block Layer ---------------------------------------- ++ ++ Queue depth...............: 128 -> 512 ++ Default MQ scheduler......: mq-deadline -> bfq ++ ++ --- CFS CPU Scheduler ---------------------------------- ++ ++ Scheduling latency.............: 6 -> 3 ms ++ Minimal granularity............: 0.75 -> 0.3 ms ++ Wakeup granularity.............: 1 -> 0.5 ms ++ CPU migration cost.............: 0.5 -> 0.25 ms ++ Bandwidth slice size...........: 5 -> 3 ms ++ Ondemand fine upscaling limit..: 95 % -> 85 % ++ ++ --- MuQSS CPU Scheduler -------------------------------- ++ ++ Scheduling interval............: 6 -> 3 ms ++ ISO task max realtime use......: 70 % -> 25 % ++ Ondemand coarse upscaling limit: 80 % -> 45 % ++ Ondemand fine upscaling limit..: 95 % -> 45 % ++ + config BROKEN + bool + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 6b3b59cc51d6..2a0072192c3d 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -37,8 +37,13 @@ + * + * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) + */ ++#ifdef CONFIG_ZENIFY ++unsigned int sysctl_sched_latency = 3000000ULL; ++static unsigned int normalized_sysctl_sched_latency = 3000000ULL; ++#else + unsigned int sysctl_sched_latency = 6000000ULL; + static unsigned int normalized_sysctl_sched_latency = 6000000ULL; ++#endif + + /* + * The initial- and re-scaling of tunables is configurable +@@ -58,13 +63,22 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L + * + * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) + */ ++#ifdef CONFIG_ZENIFY ++unsigned int sysctl_sched_min_granularity = 300000ULL; ++static unsigned int normalized_sysctl_sched_min_granularity = 300000ULL; ++#else + unsigned int sysctl_sched_min_granularity = 750000ULL; + static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; ++#endif + + /* + * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity + */ ++#ifdef CONFIG_ZENIFY ++static unsigned int sched_nr_latency = 10; ++#else + static unsigned int sched_nr_latency = 8; ++#endif + + /* + * After fork, child runs first. If set to 0 (default) then +@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly; + * + * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) + */ ++#ifdef CONFIG_ZENIFY ++unsigned int sysctl_sched_wakeup_granularity = 500000UL; ++static unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL; ++ ++const_debug unsigned int sysctl_sched_migration_cost = 50000UL; ++#else + unsigned int sysctl_sched_wakeup_granularity = 1000000UL; + static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; + + const_debug unsigned int sysctl_sched_migration_cost = 500000UL; ++#endif + + int sched_thermal_decay_shift; + static int __init setup_sched_thermal_decay_shift(char *str) +@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu) + * + * (default: 5 msec, units: microseconds) + */ ++#ifdef CONFIG_ZENIFY ++unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; ++#else + unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; + #endif ++#endif + + static inline void update_load_add(struct load_weight *lw, unsigned long inc) + { +diff --git a/mm/page-writeback.c b/mm/page-writeback.c +index 28b3e7a67565..01a1aef2b9b1 100644 +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -71,7 +71,11 @@ static long ratelimit_pages = 32; + /* + * Start background writeback (via writeback threads) at this percentage + */ ++#ifdef CONFIG_ZENIFY ++int dirty_background_ratio = 20; ++#else + int dirty_background_ratio = 10; ++#endif + + /* + * dirty_background_bytes starts at 0 (disabled) so that it is a function of +@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable; + /* + * The generator of dirty data starts writeback at this percentage + */ ++#ifdef CONFIG_ZENIFY ++int vm_dirty_ratio = 50; ++#else + int vm_dirty_ratio = 20; ++#endif + + /* + * vm_dirty_bytes starts at 0 (disabled) so that it is a function of +-- +2.28.0 + + +From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001 +From: Steven Barrett +Date: Sun, 16 Jan 2011 18:57:32 -0600 +Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control + +4.4: In my tests YeAH dramatically slowed down transfers over a WLAN, + reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10 + seconds (netperf TCP_STREAM) including long stalls. + + Be careful when choosing this. ~heftig +--- + net/ipv4/Kconfig | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig +index e64e59b536d3..bfb55ef7ebbe 100644 +--- a/net/ipv4/Kconfig ++++ b/net/ipv4/Kconfig +@@ -691,6 +691,9 @@ choice + config DEFAULT_VEGAS + bool "Vegas" if TCP_CONG_VEGAS=y + ++ config DEFAULT_YEAH ++ bool "YeAH" if TCP_CONG_YEAH=y ++ + config DEFAULT_VENO + bool "Veno" if TCP_CONG_VENO=y + +@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG + default "htcp" if DEFAULT_HTCP + default "hybla" if DEFAULT_HYBLA + default "vegas" if DEFAULT_VEGAS ++ default "yeah" if DEFAULT_YEAH + default "westwood" if DEFAULT_WESTWOOD + default "veno" if DEFAULT_VENO + default "reno" if DEFAULT_RENO +-- +2.28.0 + + +From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001 +From: Steven Barrett +Date: Wed, 28 Nov 2018 19:01:27 -0600 +Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag + strategy + +For some reason, the default strategy to respond to THP fault fallbacks +is still just madvise, meaning stall if the program wants transparent +hugepages, but don't trigger a background reclaim / compaction if THP +begins to fail allocations. This creates a snowball affect where we +still use the THP code paths, but we almost always fail once a system +has been active and busy for a while. + +The option "defer" was created for interactive systems where THP can +still improve performance. If we have to fallback to a regular page due +to an allocation failure or anything else, we will trigger a background +reclaim and compaction so future THP attempts succeed and previous +attempts eventually have their smaller pages combined without stalling +running applications. + +We still want madvise to stall applications that explicitely want THP, +so defer+madvise _does_ make a ton of sense. Make it the default for +interactive systems, especially if the kernel maintainer left +transparent hugepages on "always". + +Reasoning and details in the original patch: https://lwn.net/Articles/711248/ +--- + mm/huge_memory.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 74300e337c3c..9277f22c10a7 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly = + #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE + (1< +Date: Wed, 24 Oct 2018 16:58:52 -0300 +Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default + +Signed-off-by: Alexandre Frade +--- + net/sched/Kconfig | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/sched/Kconfig b/net/sched/Kconfig +index 84badf00647e..6a922bca9f39 100644 +--- a/net/sched/Kconfig ++++ b/net/sched/Kconfig +@@ -471,6 +471,9 @@ choice + config DEFAULT_SFQ + bool "Stochastic Fair Queue" if NET_SCH_SFQ + ++ config DEFAULT_CAKE ++ bool "Common Applications Kept Enhanced" if NET_SCH_CAKE ++ + config DEFAULT_PFIFO_FAST + bool "Priority FIFO Fast" + endchoice +@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH + default "fq" if DEFAULT_FQ + default "fq_codel" if DEFAULT_FQ_CODEL + default "sfq" if DEFAULT_SFQ ++ default "cake" if DEFAULT_CAKE + default "pfifo_fast" + endif + +-- +2.28.0 + + +From 816ee502759e954304693813bd03d94986b28dba Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Mon, 18 Feb 2019 17:40:57 +0100 +Subject: [PATCH 11/17] mm: Set watermark_scale_factor to 200 (from 10) + +Multiple users have reported it's helping reducing/eliminating stuttering +with DXVK. +--- + mm/page_alloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 898ff44f2c7b..e72074034793 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -330,7 +330,7 @@ int watermark_boost_factor __read_mostly; + #else + int watermark_boost_factor __read_mostly = 15000; + #endif +-int watermark_scale_factor = 10; ++int watermark_scale_factor = 200; + + static unsigned long nr_kernel_pages __initdata; + static unsigned long nr_all_pages __initdata; +-- +2.28.0 + + +From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Fri, 19 Apr 2019 12:33:38 +0200 +Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default + +The value is still pretty low, and AMD64-ABI and ELF extended numbering +supports that, so we should be fine on modern x86 systems. + +This fixes crashes in some applications using more than 65535 vmas (also +affects some windows games running in wine, such as Star Citizen). +--- + include/linux/mm.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index bc05c3588aa3..b0cefe94920d 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page) + * not a hard limit any more. Although some userspace tools can be surprised by + * that. + */ +-#define MAPCOUNT_ELF_CORE_MARGIN (5) +-#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) ++#define DEFAULT_MAX_MAP_COUNT (262144) + + extern int sysctl_max_map_count; + +-- +2.28.0 + + +From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Mon, 27 Jul 2020 00:19:18 +0200 +Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT + +Some games such as Detroit: Become Human tend to be very crash prone with +lower values. +--- + include/linux/mm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index b0cefe94920d..890165099b07 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page) + * not a hard limit any more. Although some userspace tools can be surprised by + * that. + */ +-#define DEFAULT_MAX_MAP_COUNT (262144) ++#define DEFAULT_MAX_MAP_COUNT (524288) + + extern int sysctl_max_map_count; + +-- +2.28.0 + + +From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Mon, 25 Nov 2019 15:13:06 -0300 +Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq + +Signed-off-by: Alexandre Frade +--- + block/elevator.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/block/elevator.c b/block/elevator.c +index 4eab3d70e880..79669aa39d79 100644 +--- a/block/elevator.c ++++ b/block/elevator.c +@@ -623,15 +623,15 @@ static inline bool elv_support_iosched(struct request_queue *q) + } + + /* +- * For single queue devices, default to using mq-deadline. If we have multiple +- * queues or mq-deadline is not available, default to "none". ++ * For single queue devices, default to using bfq. If we have multiple ++ * queues or bfq is not available, default to "none". + */ + static struct elevator_type *elevator_get_default(struct request_queue *q) + { + if (q->nr_hw_queues != 1) + return NULL; + +- return elevator_get(q, "mq-deadline", false); ++ return elevator_get(q, "bfq", false); + } + + /* +-- +2.28.0 + +From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Mon, 3 Aug 2020 17:05:04 +0000 +Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file + read-ahead pages size + +Signed-off-by: Alexandre Frade +--- + include/linux/pagemap.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h +index cf2468da68e9..007dea784451 100644 +--- a/include/linux/pagemap.h ++++ b/include/linux/pagemap.h +@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); + void delete_from_page_cache_batch(struct address_space *mapping, + struct pagevec *pvec); + +-#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) ++#define VM_READAHEAD_PAGES (SZ_2M / PAGE_SIZE) + + void page_cache_sync_readahead(struct address_space *, struct file_ra_state *, + struct file *, pgoff_t index, unsigned long req_count); +-- +2.28.0 + + +From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001 +From: Steven Barrett +Date: Wed, 15 Jan 2020 20:43:56 -0600 +Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter + +If intel-pstate is compiled into the kernel, it will preempt the loading +of acpi-cpufreq so you can take advantage of hardware p-states without +any friction. + +However, intel-pstate is not completely superior to cpufreq's ondemand +for one reason. There's no concept of an up_threshold property. + +In ondemand, up_threshold essentially reduces the maximum utilization to +compare against, allowing you to hit max frequencies and turbo boost +from a much lower core utilization. + +With intel-pstate, you have the concept of minimum and maximum +performance, but no tunable that lets you define, maximum frequency +means 50% core utilization. For just this oversight, there's reasons +you may want ondemand. + +Lets support setting "enable" in kernel boot parameters. This lets +kernel maintainers include "intel_pstate=disable" statically in the +static boot parameters, but let users of the kernel override this +selection. +--- + Documentation/admin-guide/kernel-parameters.txt | 3 +++ + drivers/cpufreq/intel_pstate.c | 2 ++ + 2 files changed, 5 insertions(+) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index fb95fad81c79..3e92fee81e33 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -1857,6 +1857,9 @@ + disable + Do not enable intel_pstate as the default + scaling driver for the supported processors ++ enable ++ Enable intel_pstate in-case "disable" was passed ++ previously in the kernel boot parameters + passive + Use intel_pstate as a scaling driver, but configure it + to work with generic cpufreq governors (instead of +diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c +index 36a469150ff9..aee891c9b78a 100644 +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str) + pr_info("HWP disabled\n"); + no_hwp = 1; + } ++ if (!strcmp(str, "enable")) ++ no_load = 0; + if (!strcmp(str, "force")) + force_load = 1; + if (!strcmp(str, "hwp_only")) +-- +2.28.0 + diff --git a/linux-tkg-patches/5.12/0003-glitched-cfs.patch b/linux-tkg-patches/5.12/0003-glitched-cfs.patch new file mode 100644 index 0000000..06b7f02 --- /dev/null +++ b/linux-tkg-patches/5.12/0003-glitched-cfs.patch @@ -0,0 +1,72 @@ +diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz +index 2a202a846757..1d9c7ed79b11 100644 +--- a/kernel/Kconfig.hz ++++ b/kernel/Kconfig.hz +@@ -4,7 +4,7 @@ + + choice + prompt "Timer frequency" +- default HZ_250 ++ default HZ_500 + help + Allows the configuration of the timer frequency. It is customary + to have the timer interrupt run at 1000 Hz but 100 Hz may be more +@@ -39,6 +39,13 @@ choice + on SMP and NUMA systems and exactly dividing by both PAL and + NTSC frame rates for video and multimedia work. + ++ config HZ_500 ++ bool "500 HZ" ++ help ++ 500 Hz is a balanced timer frequency. Provides fast interactivity ++ on desktops with great smoothness without increasing CPU power ++ consumption and sacrificing the battery life on laptops. ++ + config HZ_1000 + bool "1000 HZ" + help +@@ -52,6 +59,7 @@ config HZ + default 100 if HZ_100 + default 250 if HZ_250 + default 300 if HZ_300 ++ default 500 if HZ_500 + default 1000 if HZ_1000 + + config SCHED_HRTICK + +diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz +index 2a202a846757..1d9c7ed79b11 100644 +--- a/kernel/Kconfig.hz ++++ b/kernel/Kconfig.hz +@@ -4,7 +4,7 @@ + + choice + prompt "Timer frequency" +- default HZ_500 ++ default HZ_750 + help + Allows the configuration of the timer frequency. It is customary + to have the timer interrupt run at 1000 Hz but 100 Hz may be more +@@ -46,6 +46,13 @@ choice + on desktops with great smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + ++ config HZ_750 ++ bool "750 HZ" ++ help ++ 750 Hz is a good timer frequency for desktops. Provides fast ++ interactivity with great smoothness without sacrificing too ++ much throughput. ++ + config HZ_1000 + bool "1000 HZ" + help +@@ -60,6 +67,7 @@ config HZ + default 250 if HZ_250 + default 300 if HZ_300 + default 500 if HZ_500 ++ default 750 if HZ_750 + default 1000 if HZ_1000 + + config SCHED_HRTICK + diff --git a/linux-tkg-patches/5.12/0006-add-acs-overrides_iommu.patch b/linux-tkg-patches/5.12/0006-add-acs-overrides_iommu.patch new file mode 100644 index 0000000..d1303a5 --- /dev/null +++ b/linux-tkg-patches/5.12/0006-add-acs-overrides_iommu.patch @@ -0,0 +1,193 @@ +From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001 +From: Mark Weiman +Date: Sun, 12 Aug 2018 11:36:21 -0400 +Subject: [PATCH] pci: Enable overrides for missing ACS capabilities + +This an updated version of Alex Williamson's patch from: +https://lkml.org/lkml/2013/5/30/513 + +Original commit message follows: + +PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that +allows us to control whether transactions are allowed to be redirected +in various subnodes of a PCIe topology. For instance, if two +endpoints are below a root port or downsteam switch port, the +downstream port may optionally redirect transactions between the +devices, bypassing upstream devices. The same can happen internally +on multifunction devices. The transaction may never be visible to the +upstream devices. + +One upstream device that we particularly care about is the IOMMU. If +a redirection occurs in the topology below the IOMMU, then the IOMMU +cannot provide isolation between devices. This is why the PCIe spec +encourages topologies to include ACS support. Without it, we have to +assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation. + +Unfortunately, far too many topologies do not support ACS to make this +a steadfast requirement. Even the latest chipsets from Intel are only +sporadically supporting ACS. We have trouble getting interconnect +vendors to include the PCIe spec required PCIe capability, let alone +suggested features. + +Therefore, we need to add some flexibility. The pcie_acs_override= +boot option lets users opt-in specific devices or sets of devices to +assume ACS support. The "downstream" option assumes full ACS support +on root ports and downstream switch ports. The "multifunction" +option assumes the subset of ACS features available on multifunction +endpoints and upstream switch ports are supported. The "id:nnnn:nnnn" +option enables ACS support on devices matching the provided vendor +and device IDs, allowing more strategic ACS overrides. These options +may be combined in any order. A maximum of 16 id specific overrides +are available. It's suggested to use the most limited set of options +necessary to avoid completely disabling ACS across the topology. +Note to hardware vendors, we have facilities to permanently quirk +specific devices which enforce isolation but not provide an ACS +capability. Please contact me to have your devices added and save +your customers the hassle of this boot option. + +Signed-off-by: Mark Weiman +--- + .../admin-guide/kernel-parameters.txt | 9 ++ + drivers/pci/quirks.c | 101 ++++++++++++++++++ + 2 files changed, 110 insertions(+) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index aefd358a5ca3..173b3596fd9e 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -3190,6 +3190,15 @@ + nomsi [MSI] If the PCI_MSI kernel config parameter is + enabled, this kernel boot option can be used to + disable the use of MSI interrupts system-wide. ++ pcie_acs_override = ++ [PCIE] Override missing PCIe ACS support for: ++ downstream ++ All downstream ports - full ACS capabilities ++ multifunction ++ All multifunction devices - multifunction ACS subset ++ id:nnnn:nnnn ++ Specific device - full ACS capabilities ++ Specified as vid:did (vendor/device ID) in hex + noioapicquirk [APIC] Disable all boot interrupt quirks. + Safety option to keep boot IRQs enabled. This + should never be necessary. +diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c +index 4700d24e5d55..8f7a3d7fd9c1 100644 +--- a/drivers/pci/quirks.c ++++ b/drivers/pci/quirks.c +@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) + dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET; + } + ++static bool acs_on_downstream; ++static bool acs_on_multifunction; ++ ++#define NUM_ACS_IDS 16 ++struct acs_on_id { ++ unsigned short vendor; ++ unsigned short device; ++}; ++static struct acs_on_id acs_on_ids[NUM_ACS_IDS]; ++static u8 max_acs_id; ++ ++static __init int pcie_acs_override_setup(char *p) ++{ ++ if (!p) ++ return -EINVAL; ++ ++ while (*p) { ++ if (!strncmp(p, "downstream", 10)) ++ acs_on_downstream = true; ++ if (!strncmp(p, "multifunction", 13)) ++ acs_on_multifunction = true; ++ if (!strncmp(p, "id:", 3)) { ++ char opt[5]; ++ int ret; ++ long val; ++ ++ if (max_acs_id >= NUM_ACS_IDS - 1) { ++ pr_warn("Out of PCIe ACS override slots (%d)\n", ++ NUM_ACS_IDS); ++ goto next; ++ } ++ ++ p += 3; ++ snprintf(opt, 5, "%s", p); ++ ret = kstrtol(opt, 16, &val); ++ if (ret) { ++ pr_warn("PCIe ACS ID parse error %d\n", ret); ++ goto next; ++ } ++ acs_on_ids[max_acs_id].vendor = val; ++ ++ p += strcspn(p, ":"); ++ if (*p != ':') { ++ pr_warn("PCIe ACS invalid ID\n"); ++ goto next; ++ } ++ ++ p++; ++ snprintf(opt, 5, "%s", p); ++ ret = kstrtol(opt, 16, &val); ++ if (ret) { ++ pr_warn("PCIe ACS ID parse error %d\n", ret); ++ goto next; ++ } ++ acs_on_ids[max_acs_id].device = val; ++ max_acs_id++; ++ } ++next: ++ p += strcspn(p, ","); ++ if (*p == ',') ++ p++; ++ } ++ ++ if (acs_on_downstream || acs_on_multifunction || max_acs_id) ++ pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n"); ++ ++ return 0; ++} ++early_param("pcie_acs_override", pcie_acs_override_setup); ++ ++static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags) ++{ ++ int i; ++ ++ /* Never override ACS for legacy devices or devices with ACS caps */ ++ if (!pci_is_pcie(dev) || ++ pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS)) ++ return -ENOTTY; ++ ++ for (i = 0; i < max_acs_id; i++) ++ if (acs_on_ids[i].vendor == dev->vendor && ++ acs_on_ids[i].device == dev->device) ++ return 1; ++ ++ switch (pci_pcie_type(dev)) { ++ case PCI_EXP_TYPE_DOWNSTREAM: ++ case PCI_EXP_TYPE_ROOT_PORT: ++ if (acs_on_downstream) ++ return 1; ++ break; ++ case PCI_EXP_TYPE_ENDPOINT: ++ case PCI_EXP_TYPE_UPSTREAM: ++ case PCI_EXP_TYPE_LEG_END: ++ case PCI_EXP_TYPE_RC_END: ++ if (acs_on_multifunction && dev->multifunction) ++ return 1; ++ } ++ ++ return -ENOTTY; ++} + /* + * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset. + * The device will throw a Link Down error on AER-capable systems and +@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled { + { PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs }, + /* Zhaoxin Root/Downstream Ports */ + { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, ++ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides }, + { 0 } + }; + + diff --git a/linux-tkg-patches/5.12/0007-v5.12-fsync.patch b/linux-tkg-patches/5.12/0007-v5.12-fsync.patch new file mode 100644 index 0000000..47badbb --- /dev/null +++ b/linux-tkg-patches/5.12/0007-v5.12-fsync.patch @@ -0,0 +1,597 @@ +From 7b5df0248ce255ef5b7204d65a7b3783ebb76a3d Mon Sep 17 00:00:00 2001 +From: Gabriel Krisman Bertazi +Date: Fri, 13 Dec 2019 11:08:02 -0300 +Subject: [PATCH 1/2] futex: Implement mechanism to wait on any of several + futexes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This is a new futex operation, called FUTEX_WAIT_MULTIPLE, which allows +a thread to wait on several futexes at the same time, and be awoken by +any of them. In a sense, it implements one of the features that was +supported by pooling on the old FUTEX_FD interface. + +The use case lies in the Wine implementation of the Windows NT interface +WaitMultipleObjects. This Windows API function allows a thread to sleep +waiting on the first of a set of event sources (mutexes, timers, signal, +console input, etc) to signal. Considering this is a primitive +synchronization operation for Windows applications, being able to quickly +signal events on the producer side, and quickly go to sleep on the +consumer side is essential for good performance of those running over Wine. + +Wine developers have an implementation that uses eventfd, but it suffers +from FD exhaustion (there is applications that go to the order of +multi-milion FDs), and higher CPU utilization than this new operation. + +The futex list is passed as an array of `struct futex_wait_block` +(pointer, value, bitset) to the kernel, which will enqueue all of them +and sleep if none was already triggered. It returns a hint of which +futex caused the wake up event to userspace, but the hint doesn't +guarantee that is the only futex triggered. Before calling the syscall +again, userspace should traverse the list, trying to re-acquire any of +the other futexes, to prevent an immediate -EWOULDBLOCK return code from +the kernel. + +This was tested using three mechanisms: + +1) By reimplementing FUTEX_WAIT in terms of FUTEX_WAIT_MULTIPLE and +running the unmodified tools/testing/selftests/futex and a full linux +distro on top of this kernel. + +2) By an example code that exercises the FUTEX_WAIT_MULTIPLE path on a +multi-threaded, event-handling setup. + +3) By running the Wine fsync implementation and executing multi-threaded +applications, in particular modern games, on top of this implementation. + +Changes were tested for the following ABIs: x86_64, i386 and x32. +Support for x32 applications is not implemented since it would +take a major rework adding a new entry point and splitting the current +futex 64 entry point in two and we can't change the current x32 syscall +number without breaking user space compatibility. + +CC: Steven Rostedt +Cc: Richard Yao +Cc: Thomas Gleixner +Cc: Peter Zijlstra +Co-developed-by: Zebediah Figura +Signed-off-by: Zebediah Figura +Co-developed-by: Steven Noonan +Signed-off-by: Steven Noonan +Co-developed-by: Pierre-Loup A. Griffais +Signed-off-by: Pierre-Loup A. Griffais +Signed-off-by: Gabriel Krisman Bertazi +[Added compatibility code] +Co-developed-by: André Almeida +Signed-off-by: André Almeida + +Adjusted for v5.9: Removed `put_futex_key` calls. +--- + include/uapi/linux/futex.h | 20 +++ + kernel/futex.c | 352 ++++++++++++++++++++++++++++++++++++- + 2 files changed, 370 insertions(+), 2 deletions(-) + +diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h +index a89eb0accd5e2..580001e89c6ca 100644 +--- a/include/uapi/linux/futex.h ++++ b/include/uapi/linux/futex.h +@@ -21,6 +21,7 @@ + #define FUTEX_WAKE_BITSET 10 + #define FUTEX_WAIT_REQUEUE_PI 11 + #define FUTEX_CMP_REQUEUE_PI 12 ++#define FUTEX_WAIT_MULTIPLE 13 + + #define FUTEX_PRIVATE_FLAG 128 + #define FUTEX_CLOCK_REALTIME 256 +@@ -40,6 +41,8 @@ + FUTEX_PRIVATE_FLAG) + #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) ++#define FUTEX_WAIT_MULTIPLE_PRIVATE (FUTEX_WAIT_MULTIPLE | \ ++ FUTEX_PRIVATE_FLAG) + + /* + * Support for robust futexes: the kernel cleans up held futexes at +@@ -150,4 +153,21 @@ struct robust_list_head { + (((op & 0xf) << 28) | ((cmp & 0xf) << 24) \ + | ((oparg & 0xfff) << 12) | (cmparg & 0xfff)) + ++/* ++ * Maximum number of multiple futexes to wait for ++ */ ++#define FUTEX_MULTIPLE_MAX_COUNT 128 ++ ++/** ++ * struct futex_wait_block - Block of futexes to be waited for ++ * @uaddr: User address of the futex ++ * @val: Futex value expected by userspace ++ * @bitset: Bitset for the optional bitmasked wakeup ++ */ ++struct futex_wait_block { ++ __u32 __user *uaddr; ++ __u32 val; ++ __u32 bitset; ++}; ++ + #endif /* _UAPI_LINUX_FUTEX_H */ +diff --git a/kernel/futex.c b/kernel/futex.c +index a5876694a60eb..6f4bea76df460 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -197,6 +197,8 @@ struct futex_pi_state { + * @rt_waiter: rt_waiter storage for use with requeue_pi + * @requeue_pi_key: the requeue_pi target futex key + * @bitset: bitset for the optional bitmasked wakeup ++ * @uaddr: userspace address of futex ++ * @uval: expected futex's value + * + * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so + * we can wake only the relevant ones (hashed queues may be shared). +@@ -219,6 +221,8 @@ struct futex_q { + struct rt_mutex_waiter *rt_waiter; + union futex_key *requeue_pi_key; + u32 bitset; ++ u32 __user *uaddr; ++ u32 uval; + } __randomize_layout; + + static const struct futex_q futex_q_init = { +@@ -2304,6 +2308,29 @@ static int unqueue_me(struct futex_q *q) + return ret; + } + ++/** ++ * unqueue_multiple() - Remove several futexes from their futex_hash_bucket ++ * @q: The list of futexes to unqueue ++ * @count: Number of futexes in the list ++ * ++ * Helper to unqueue a list of futexes. This can't fail. ++ * ++ * Return: ++ * - >=0 - Index of the last futex that was awoken; ++ * - -1 - If no futex was awoken ++ */ ++static int unqueue_multiple(struct futex_q *q, int count) ++{ ++ int ret = -1; ++ int i; ++ ++ for (i = 0; i < count; i++) { ++ if (!unqueue_me(&q[i])) ++ ret = i; ++ } ++ return ret; ++} ++ + /* + * PI futexes can not be requeued and must remove themself from the + * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry +@@ -2662,6 +2689,205 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, + return ret; + } + ++/** ++ * futex_wait_multiple_setup() - Prepare to wait and enqueue multiple futexes ++ * @qs: The corresponding futex list ++ * @count: The size of the lists ++ * @flags: Futex flags (FLAGS_SHARED, etc.) ++ * @awaken: Index of the last awoken futex ++ * ++ * Prepare multiple futexes in a single step and enqueue them. This may fail if ++ * the futex list is invalid or if any futex was already awoken. On success the ++ * task is ready to interruptible sleep. ++ * ++ * Return: ++ * - 1 - One of the futexes was awaken by another thread ++ * - 0 - Success ++ * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL ++ */ ++static int futex_wait_multiple_setup(struct futex_q *qs, int count, ++ unsigned int flags, int *awaken) ++{ ++ struct futex_hash_bucket *hb; ++ int ret, i; ++ u32 uval; ++ ++ /* ++ * Enqueuing multiple futexes is tricky, because we need to ++ * enqueue each futex in the list before dealing with the next ++ * one to avoid deadlocking on the hash bucket. But, before ++ * enqueuing, we need to make sure that current->state is ++ * TASK_INTERRUPTIBLE, so we don't absorb any awake events, which ++ * cannot be done before the get_futex_key of the next key, ++ * because it calls get_user_pages, which can sleep. Thus, we ++ * fetch the list of futexes keys in two steps, by first pinning ++ * all the memory keys in the futex key, and only then we read ++ * each key and queue the corresponding futex. ++ */ ++retry: ++ for (i = 0; i < count; i++) { ++ qs[i].key = FUTEX_KEY_INIT; ++ ret = get_futex_key(qs[i].uaddr, flags & FLAGS_SHARED, ++ &qs[i].key, FUTEX_READ); ++ if (unlikely(ret)) { ++ return ret; ++ } ++ } ++ ++ set_current_state(TASK_INTERRUPTIBLE); ++ ++ for (i = 0; i < count; i++) { ++ struct futex_q *q = &qs[i]; ++ ++ hb = queue_lock(q); ++ ++ ret = get_futex_value_locked(&uval, q->uaddr); ++ if (ret) { ++ /* ++ * We need to try to handle the fault, which ++ * cannot be done without sleep, so we need to ++ * undo all the work already done, to make sure ++ * we don't miss any wake ups. Therefore, clean ++ * up, handle the fault and retry from the ++ * beginning. ++ */ ++ queue_unlock(hb); ++ ++ /* ++ * Keys 0..(i-1) are implicitly put ++ * on unqueue_multiple. ++ */ ++ *awaken = unqueue_multiple(qs, i); ++ ++ __set_current_state(TASK_RUNNING); ++ ++ /* ++ * On a real fault, prioritize the error even if ++ * some other futex was awoken. Userspace gave ++ * us a bad address, -EFAULT them. ++ */ ++ ret = get_user(uval, q->uaddr); ++ if (ret) ++ return ret; ++ ++ /* ++ * Even if the page fault was handled, If ++ * something was already awaken, we can safely ++ * give up and succeed to give a hint for userspace to ++ * acquire the right futex faster. ++ */ ++ if (*awaken >= 0) ++ return 1; ++ ++ goto retry; ++ } ++ ++ if (uval != q->uval) { ++ queue_unlock(hb); ++ ++ /* ++ * If something was already awaken, we can ++ * safely ignore the error and succeed. ++ */ ++ *awaken = unqueue_multiple(qs, i); ++ __set_current_state(TASK_RUNNING); ++ if (*awaken >= 0) ++ return 1; ++ ++ return -EWOULDBLOCK; ++ } ++ ++ /* ++ * The bucket lock can't be held while dealing with the ++ * next futex. Queue each futex at this moment so hb can ++ * be unlocked. ++ */ ++ queue_me(&qs[i], hb); ++ } ++ return 0; ++} ++ ++/** ++ * futex_wait_multiple() - Prepare to wait on and enqueue several futexes ++ * @qs: The list of futexes to wait on ++ * @op: Operation code from futex's syscall ++ * @count: The number of objects ++ * @abs_time: Timeout before giving up and returning to userspace ++ * ++ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function ++ * sleeps on a group of futexes and returns on the first futex that ++ * triggered, or after the timeout has elapsed. ++ * ++ * Return: ++ * - >=0 - Hint to the futex that was awoken ++ * - <0 - On error ++ */ ++static int futex_wait_multiple(struct futex_q *qs, int op, ++ u32 count, ktime_t *abs_time) ++{ ++ struct hrtimer_sleeper timeout, *to; ++ int ret, flags = 0, hint = 0; ++ unsigned int i; ++ ++ if (!(op & FUTEX_PRIVATE_FLAG)) ++ flags |= FLAGS_SHARED; ++ ++ if (op & FUTEX_CLOCK_REALTIME) ++ flags |= FLAGS_CLOCKRT; ++ ++ to = futex_setup_timer(abs_time, &timeout, flags, 0); ++ while (1) { ++ ret = futex_wait_multiple_setup(qs, count, flags, &hint); ++ if (ret) { ++ if (ret > 0) { ++ /* A futex was awaken during setup */ ++ ret = hint; ++ } ++ break; ++ } ++ ++ if (to) ++ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); ++ ++ /* ++ * Avoid sleeping if another thread already tried to ++ * wake us. ++ */ ++ for (i = 0; i < count; i++) { ++ if (plist_node_empty(&qs[i].list)) ++ break; ++ } ++ ++ if (i == count && (!to || to->task)) ++ freezable_schedule(); ++ ++ ret = unqueue_multiple(qs, count); ++ ++ __set_current_state(TASK_RUNNING); ++ ++ if (ret >= 0) ++ break; ++ if (to && !to->task) { ++ ret = -ETIMEDOUT; ++ break; ++ } else if (signal_pending(current)) { ++ ret = -ERESTARTSYS; ++ break; ++ } ++ /* ++ * The final case is a spurious wakeup, for ++ * which just retry. ++ */ ++ } ++ ++ if (to) { ++ hrtimer_cancel(&to->timer); ++ destroy_hrtimer_on_stack(&to->timer); ++ } ++ ++ return ret; ++} ++ + static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, + ktime_t *abs_time, u32 bitset) + { +@@ -3774,6 +4000,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + return -ENOSYS; + } + ++/** ++ * futex_read_wait_block - Read an array of futex_wait_block from userspace ++ * @uaddr: Userspace address of the block ++ * @count: Number of blocks to be read ++ * ++ * This function creates and allocate an array of futex_q (we zero it to ++ * initialize the fields) and then, for each futex_wait_block element from ++ * userspace, fill a futex_q element with proper values. ++ */ ++inline struct futex_q *futex_read_wait_block(u32 __user *uaddr, u32 count) ++{ ++ unsigned int i; ++ struct futex_q *qs; ++ struct futex_wait_block fwb; ++ struct futex_wait_block __user *entry = ++ (struct futex_wait_block __user *)uaddr; ++ ++ if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) ++ return ERR_PTR(-EINVAL); ++ ++ qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); ++ if (!qs) ++ return ERR_PTR(-ENOMEM); ++ ++ for (i = 0; i < count; i++) { ++ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { ++ kfree(qs); ++ return ERR_PTR(-EFAULT); ++ } ++ ++ qs[i].uaddr = fwb.uaddr; ++ qs[i].uval = fwb.val; ++ qs[i].bitset = fwb.bitset; ++ } ++ ++ return qs; ++} + + SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + struct __kernel_timespec __user *, utime, u32 __user *, uaddr2, +@@ -3786,7 +4049,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + + if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || + cmd == FUTEX_WAIT_BITSET || +- cmd == FUTEX_WAIT_REQUEUE_PI)) { ++ cmd == FUTEX_WAIT_REQUEUE_PI || ++ cmd == FUTEX_WAIT_MULTIPLE)) { + if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) + return -EFAULT; + if (get_timespec64(&ts, utime)) +@@ -3807,6 +4071,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) + val2 = (u32) (unsigned long) utime; + ++ if (cmd == FUTEX_WAIT_MULTIPLE) { ++ int ret; ++ struct futex_q *qs; ++ ++#ifdef CONFIG_X86_X32 ++ if (unlikely(in_x32_syscall())) ++ return -ENOSYS; ++#endif ++ qs = futex_read_wait_block(uaddr, val); ++ ++ if (IS_ERR(qs)) ++ return PTR_ERR(qs); ++ ++ ret = futex_wait_multiple(qs, op, val, tp); ++ kfree(qs); ++ ++ return ret; ++ } ++ + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); + } + +@@ -3969,6 +4252,57 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + #endif /* CONFIG_COMPAT */ + + #ifdef CONFIG_COMPAT_32BIT_TIME ++/** ++ * struct compat_futex_wait_block - Block of futexes to be waited for ++ * @uaddr: User address of the futex (compatible pointer) ++ * @val: Futex value expected by userspace ++ * @bitset: Bitset for the optional bitmasked wakeup ++ */ ++struct compat_futex_wait_block { ++ compat_uptr_t uaddr; ++ __u32 val; ++ __u32 bitset; ++}; ++ ++/** ++ * compat_futex_read_wait_block - Read an array of futex_wait_block from ++ * userspace ++ * @uaddr: Userspace address of the block ++ * @count: Number of blocks to be read ++ * ++ * This function does the same as futex_read_wait_block(), except that it ++ * converts the pointer to the futex from the compat version to the regular one. ++ */ ++inline struct futex_q *compat_futex_read_wait_block(u32 __user *uaddr, ++ u32 count) ++{ ++ unsigned int i; ++ struct futex_q *qs; ++ struct compat_futex_wait_block fwb; ++ struct compat_futex_wait_block __user *entry = ++ (struct compat_futex_wait_block __user *)uaddr; ++ ++ if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) ++ return ERR_PTR(-EINVAL); ++ ++ qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); ++ if (!qs) ++ return ERR_PTR(-ENOMEM); ++ ++ for (i = 0; i < count; i++) { ++ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { ++ kfree(qs); ++ return ERR_PTR(-EFAULT); ++ } ++ ++ qs[i].uaddr = compat_ptr(fwb.uaddr); ++ qs[i].uval = fwb.val; ++ qs[i].bitset = fwb.bitset; ++ } ++ ++ return qs; ++} ++ + SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + struct old_timespec32 __user *, utime, u32 __user *, uaddr2, + u32, val3) +@@ -3980,7 +4314,8 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + + if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || + cmd == FUTEX_WAIT_BITSET || +- cmd == FUTEX_WAIT_REQUEUE_PI)) { ++ cmd == FUTEX_WAIT_REQUEUE_PI || ++ cmd == FUTEX_WAIT_MULTIPLE)) { + if (get_old_timespec32(&ts, utime)) + return -EFAULT; + if (!timespec64_valid(&ts)) +@@ -3995,6 +4330,19 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) + val2 = (int) (unsigned long) utime; + ++ if (cmd == FUTEX_WAIT_MULTIPLE) { ++ int ret; ++ struct futex_q *qs = compat_futex_read_wait_block(uaddr, val); ++ ++ if (IS_ERR(qs)) ++ return PTR_ERR(qs); ++ ++ ret = futex_wait_multiple(qs, op, val, tp); ++ kfree(qs); ++ ++ return ret; ++ } ++ + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); + } + #endif /* CONFIG_COMPAT_32BIT_TIME */ + +From ccdddb50d330d2ee1a4d2cbfdd27bdd7fb10eec3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Andr=C3=A9=20Almeida?= +Date: Fri, 7 Feb 2020 23:28:02 -0300 +Subject: [PATCH 2/2] futex: Add Proton compatibility code + +--- + include/uapi/linux/futex.h | 2 +- + kernel/futex.c | 5 +++-- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h +index 580001e89c6ca..a3e760886b8e7 100644 +--- a/include/uapi/linux/futex.h ++++ b/include/uapi/linux/futex.h +@@ -21,7 +21,7 @@ + #define FUTEX_WAKE_BITSET 10 + #define FUTEX_WAIT_REQUEUE_PI 11 + #define FUTEX_CMP_REQUEUE_PI 12 +-#define FUTEX_WAIT_MULTIPLE 13 ++#define FUTEX_WAIT_MULTIPLE 31 + + #define FUTEX_PRIVATE_FLAG 128 + #define FUTEX_CLOCK_REALTIME 256 +diff --git a/kernel/futex.c b/kernel/futex.c +index 6f4bea76df460..03d89fe7b8392 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -4059,7 +4059,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + return -EINVAL; + + t = timespec64_to_ktime(ts); +- if (cmd == FUTEX_WAIT) ++ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) + t = ktime_add_safe(ktime_get(), t); + tp = &t; + } +@@ -4260,6 +4260,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + */ + struct compat_futex_wait_block { + compat_uptr_t uaddr; ++ __u32 pad; + __u32 val; + __u32 bitset; + }; +@@ -4322,7 +4323,7 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + return -EINVAL; + + t = timespec64_to_ktime(ts); +- if (cmd == FUTEX_WAIT) ++ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) + t = ktime_add_safe(ktime_get(), t); + tp = &t; + } diff --git a/linux-tkg-patches/5.12/0012-misc-additions.patch b/linux-tkg-patches/5.12/0012-misc-additions.patch new file mode 100644 index 0000000..2e058a6 --- /dev/null +++ b/linux-tkg-patches/5.12/0012-misc-additions.patch @@ -0,0 +1,133 @@ +From e5e77ad2223f662e1615266d8ef39a8db7e65a70 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Felix=20H=C3=A4dicke?= +Date: Thu, 19 Nov 2020 09:22:32 +0100 +Subject: HID: quirks: Add Apple Magic Trackpad 2 to hid_have_special_driver + list +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The Apple Magic Trackpad 2 is handled by the magicmouse driver. And +there were severe stability issues when both drivers (hid-generic and +hid-magicmouse) were loaded for this device. + +Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=210241 + +Signed-off-by: Felix Hädicke +--- + drivers/hid/hid-quirks.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c +index bf7ecab5d9e5..142e9dae2837 100644 +--- a/drivers/hid/hid-quirks.c ++++ b/drivers/hid/hid-quirks.c +@@ -478,6 +478,8 @@ static const struct hid_device_id hid_have_special_driver[] = { + #if IS_ENABLED(CONFIG_HID_MAGICMOUSE) + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) }, ++ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) }, + #endif + #if IS_ENABLED(CONFIG_HID_MAYFLASH) + { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) }, +-- +cgit v1.2.3-1-gf6bb5 + +From e437ac931e89629f952ce9f3f9dfe45ac505cd0d Mon Sep 17 00:00:00 2001 +From: Joshua Ashton +Date: Tue, 5 Jan 2021 19:46:01 +0000 +Subject: [PATCH] drm/amdgpu: don't limit gtt size on apus + +Since commit 24562523688b ("Revert "drm/amd/amdgpu: set gtt size +according to system memory size only""), the GTT size was limited by +3GiB or VRAM size. + +This is problematic on APU systems with a small carveout +(notably, those that ship with dGPUs where this is unconfigurable), +where the carveout size can be as low as 128MiB. + +This makes it so the GTT size heuristic always uses 3/4ths of +the system memory size on APUs (limiting the size by 3GiB/VRAM size +only on devices with dedicated video memory). + +Fixes: 24562523688b ("Revert drm/amd/amdgpu: set gtt size according to +system memory size only") + +Signed-off-by: Joshua Ashton +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +++-- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 12 +++++++++--- + 2 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +index 72efd579ec5e..a5a41e9272d6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -192,8 +192,9 @@ module_param_named(gartsize, amdgpu_gart_size, uint, 0600); + + /** + * DOC: gttsize (int) +- * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM, +- * otherwise 3/4 RAM size). ++ * Restrict the size of GTT domain in MiB for testing. The default is -1 (On APUs this is 3/4th ++ * of the system memory; on dGPUs this is 3GiB or VRAM sized, whichever is bigger, ++ * with an upper bound of 3/4th of system memory. + */ + MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)"); + module_param_named(gttsize, amdgpu_gtt_size, int, 0600); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +index 4d8f19ab1014..294f26f4f310 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +@@ -1865,9 +1865,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) + struct sysinfo si; + + si_meminfo(&si); +- gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), +- adev->gmc.mc_vram_size), +- ((uint64_t)si.totalram * si.mem_unit * 3/4)); ++ gtt_size = (uint64_t)si.totalram * si.mem_unit * 3/4; ++ /* If we have dedicated memory, limit our GTT size to ++ * 3GiB or VRAM size, whichever is bigger ++ */ ++ if (!(adev->flags & AMD_IS_APU)) { ++ gtt_size = min(max(AMDGPU_DEFAULT_GTT_SIZE_MB << 20, ++ adev->gmc.mc_vram_size), ++ gtt_size); ++ } + } + else + gtt_size = (uint64_t)amdgpu_gtt_size << 20; +-- +2.30.0 + +From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Wed, 3 Feb 2021 11:20:12 +0200 +Subject: Revert "cpufreq: Avoid configuring old governors as default with intel_pstate" + +This is an undesirable behavior for us since our aggressive ondemand performs +better than schedutil for gaming when using intel_pstate in passive mode. +Also it interferes with the option to select the desired default governor we have. + +diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig +index 2c7171e0b0010..85de313ddec29 100644 +--- a/drivers/cpufreq/Kconfig ++++ b/drivers/cpufreq/Kconfig +@@ -71,7 +71,6 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE + + config CPU_FREQ_DEFAULT_GOV_ONDEMAND + bool "ondemand" +- depends on !(X86_INTEL_PSTATE && SMP) + select CPU_FREQ_GOV_ONDEMAND + select CPU_FREQ_GOV_PERFORMANCE + help +@@ -83,7 +84,6 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND + + config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE + bool "conservative" +- depends on !(X86_INTEL_PSTATE && SMP) + select CPU_FREQ_GOV_CONSERVATIVE + select CPU_FREQ_GOV_PERFORMANCE + help