From 2deb19f1abba718b42e8a307c6fab3da8da1aba0 Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Mon, 12 Oct 2020 12:04:09 +0200 Subject: [PATCH] linux59-tkg: Update linux59-rc-tkg to 5.9.0 release This is an initial release with a couple compilation fixes for Project C schedulers (until Alfred fixes it on his end). Fsync support is missing at this point in time. --- {linux59-rc-tkg => linux59-tkg}/PKGBUILD | 37 +- {linux59-rc-tkg => linux59-tkg}/README.md | 23 +- .../customization.cfg | 27 +- {linux59-rc-tkg => linux59-tkg}/install.sh | 48 +- .../linux59-tkg-config/90-cleanup.hook | 0 .../linux59-tkg-config/cleanup | 0 .../linux59-tkg-config/config.x86_64 | 133 +- .../generic-desktop-profile.cfg | 20 - .../linux59-tkg-config/prepare | 103 +- .../ryzen-desktop-profile.cfg | 20 - ...sallow-unprivileged-CLONE_NEWUSER-by.patch | 0 .../0002-clear-patches.patch | 98 +- .../0003-glitched-base.patch | 411 +- .../0003-glitched-cfs.patch | 0 .../0005-glitched-pds.patch | 76 - .../0006-add-acs-overrides_iommu.patch | 0 .../0009-glitched-bmq.patch | 90 + .../0009-glitched-ondemand-bmq.patch | 4 +- .../0009-prjc_v5.9-r0.patch | 5361 +++++++++-------- .../linux59-tkg-patches/0011-ZFS-fix.patch | 0 .../0012-misc-additions.patch | 0 21 files changed, 3430 insertions(+), 3021 deletions(-) rename {linux59-rc-tkg => linux59-tkg}/PKGBUILD (89%) rename {linux59-rc-tkg => linux59-tkg}/README.md (72%) rename {linux59-rc-tkg => linux59-tkg}/customization.cfg (91%) rename {linux59-rc-tkg => linux59-tkg}/install.sh (88%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-config/90-cleanup.hook (100%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-config/cleanup (100%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-config/config.x86_64 (99%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-config/generic-desktop-profile.cfg (66%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-config/prepare (95%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-config/ryzen-desktop-profile.cfg (69%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch (100%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-patches/0002-clear-patches.patch (75%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-patches/0003-glitched-base.patch (62%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-patches/0003-glitched-cfs.patch (100%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-patches/0005-glitched-pds.patch (50%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-patches/0006-add-acs-overrides_iommu.patch (100%) create mode 100644 linux59-tkg/linux59-tkg-patches/0009-glitched-bmq.patch rename linux59-rc-tkg/linux59-tkg-patches/0005-glitched-ondemand-pds.patch => linux59-tkg/linux59-tkg-patches/0009-glitched-ondemand-bmq.patch (88%) rename linux59-rc-tkg/linux59-tkg-patches/0005-v5.9_undead-pds099o.patch => linux59-tkg/linux59-tkg-patches/0009-prjc_v5.9-r0.patch (73%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-patches/0011-ZFS-fix.patch (100%) rename {linux59-rc-tkg => linux59-tkg}/linux59-tkg-patches/0012-misc-additions.patch (100%) diff --git a/linux59-rc-tkg/PKGBUILD b/linux59-tkg/PKGBUILD similarity index 89% rename from linux59-rc-tkg/PKGBUILD rename to linux59-tkg/PKGBUILD index c5ac056..d3c43a8 100644 --- a/linux59-rc-tkg/PKGBUILD +++ b/linux59-tkg/PKGBUILD @@ -53,8 +53,8 @@ license=('GPL2') makedepends=('xmlto' 'docbook-xsl' 'kmod' 'inetutils' 'bc' 'libelf' 'pahole' 'patchutils' 'flex' 'python-sphinx' 'python-sphinx_rtd_theme' 'graphviz' 'imagemagick' 'git') optdepends=('schedtool') options=('!strip' 'docs') -source=("https://git.kernel.org/torvalds/t/linux-${_basekernel}-${_sub}.tar.gz" - #"https://www.kernel.org/pub/linux/kernel/v5.x/patch-${pkgver}.xz" +source=("https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-${_basekernel}.tar.xz" + #"https://cdn.kernel.org/pub/linux/kernel/v5.x/patch-${pkgver}.xz" "https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch" 'config.x86_64' # stock Arch config #'config_hardened.x86_64' # hardened Arch config @@ -68,35 +68,38 @@ source=("https://git.kernel.org/torvalds/t/linux-${_basekernel}-${_sub}.tar.gz" 0003-glitched-cfs.patch #0004-glitched-ondemand-muqss.patch #0004-glitched-muqss.patch - #0004-5.9-ck1.patch - 0005-glitched-ondemand-pds.patch + #0004-5.8-ck1.patch + #0005-undead-glitched-ondemand-pds.patch + #0005-undead-glitched-pds.patch + #0005-v5.8_undead-pds099o.patch 0005-glitched-pds.patch - 0005-v5.9_undead-pds099o.patch 0006-add-acs-overrides_iommu.patch - #0007-v5.9-fsync.patch - #0008-5.9-bcachefs.patch - #0009-glitched-ondemand-bmq.patch - #0009-glitched-bmq.patch - #0009-prjc_v5.9-r0.patch + #0007-v5.8-fsync.patch + #0008-5.8-bcachefs.patch + 0009-glitched-ondemand-bmq.patch + 0009-glitched-bmq.patch + 0009-prjc_v5.9-r0.patch 0011-ZFS-fix.patch #0012-linux-hardened.patch 0012-misc-additions.patch ) -sha256sums=('fc5d8eae3949051d2d99c9425c234b580d7c20e816617f6cab928197ab55127a' +sha256sums=('3239a4ee1250bf2048be988cc8cb46c487b2c8a0de5b1b032d38394d5c6b1a06' '5ab29eb64e57df83b395a29a6a4f89030d142feffbfbf73b3afc6d97a2a7fd12' - '20da98426048a222adeaf6606c9695d7a36974f4110a5adbe77c482898b59348' + 'ca84d1966bf13570768a65015ddcbde198d866d2a5a44df21a581ed57860b887' '1e15fc2ef3fa770217ecc63a220e5df2ddbcf3295eb4a021171e7edd4c6cc898' '66a03c246037451a77b4d448565b1d7e9368270c7d02872fbd0b5d024ed0a997' 'f6383abef027fd9a430fd33415355e0df492cdc3c90e9938bf2d98f4f63b32e6' - 'd02bf5ca08fd610394b9d3a0c3b176d74af206f897dee826e5cbaec97bb4a4aa' - 'd6cde733ad3bd8287528f69c16debfa75e7e43e8bb4c26a8ca9e1a3a7e303a02' + '35a7cde86fb94939c0f25a62b8c47f3de0dbd3c65f876f460b263181b3e92fc0' + 'b9ebe0ae69bc2b2091d6bfcf6c7875a87ea7969fcfa4e306c48d47a60f9ef4d6' '7058e57fd68367b029adc77f2a82928f1433daaf02c8c279cb2d13556c8804d7' - '62496f9ca788996181ef145f96ad26291282fcc3fb95cdc04080dcf84365be33' - 'b5b0347c18c6217e074890fd109002586fc23f4a45dec89f72e72c5845e73db2' - 'd7726495963bf23ee54958e63b7e7aa00feb3281b10f58cae6567488e7ddc19f' + 'fca63d15ca4502aebd73e76d7499b243d2c03db71ff5ab0bf5cf268b2e576320' '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a' + '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177' + 'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911' + '83ddfd5db3068f47b64d331d411e7c76a8f7366e51112f73d3aa0198a4c2116b' '49262ce4a8089fa70275aad742fc914baa28d9c384f710c9a62f64796d13e104' '433b919e6a0be26784fb4304c43b1811a28f12ad3de9e26c0af827f64c0c316e') + export KBUILD_BUILD_HOST=archlinux export KBUILD_BUILD_USER=$pkgbase export KBUILD_BUILD_TIMESTAMP="$(date -Ru${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH})" diff --git a/linux59-rc-tkg/README.md b/linux59-tkg/README.md similarity index 72% rename from linux59-rc-tkg/README.md rename to linux59-tkg/README.md index 0b33114..573fa0f 100644 --- a/linux59-rc-tkg/README.md +++ b/linux59-tkg/README.md @@ -1,15 +1,17 @@ **Due to intel_pstate poor performances as of late, I have decided to set it to passive mode to make use of the acpi_cpufreq governors passthrough, keeping full support for turbo frequencies.** -### PDS, MuQSS and BMQ are not yet available options for this revision -## Nvidia prop drivers need to be patched (https://github.com/Frogging-Family/nvidia-all can do that automatically for you) +### MuQSS is not an available option for this revision yet -A custom Linux kernel 5.9 RC with added tweaks for a nice interactivity/performance balance, aiming for the best gaming experience. +A custom Linux kernel 5.9.y with specific Undead PDS, Project C / PDS & BMQ CPU schedulers related patchsets selector (stock CFS is also an option) and added tweaks for a nice interactivity/performance balance, aiming for the best gaming experience. + +- Project C / PDS & BMQ : http://cchalpha.blogspot.com/ Various personalization options available and userpatches support (put your own patches in the same dir as the PKGBUILD, with the ".mypatch" extension). The options built with are installed to `/usr/share/doc/$pkgbase/customization.cfg`, where `$pkgbase` is the package name. Comes with a slightly modified Arch config asking for a few core personalization settings at compilation time. + If you want to streamline your kernel config for lower footprint and faster compilations : https://wiki.archlinux.org/index.php/Modprobed-db -You can enable support for it at the beginning of the PKGBUILD file. Make sure to read everything you need to know about it. +You can optionally enable support for it at the beginning of the PKGBUILD file. **Make sure to read everything you need to know about it as there are big caevats making it NOT recommended for most users.** ## Other stuff included: - Graysky's per-CPU-arch native optimizations - https://github.com/graysky2/kernel_gcc_patch @@ -22,24 +24,20 @@ You can enable support for it at the beginning of the PKGBUILD file. Make sure t - using vm.max_map_count=524288 by default - cherry-picked clear linux patches - **optional** overrides for missing ACS capabilities -- **optional** ZFS fpu symbols - - -## Install procedure ## Install procedure ### DEB (Debian, Ubuntu and derivatives) and RPM (Fedora, SUSE and derivatives) based distributions ``` git clone https://github.com/Frogging-Family/linux-tkg.git -cd linux-tkg/linux59-rc-tkg +cd linux-tkg/linux58-tkg # Optional: edit customization.cfg file ./install.sh install ``` -Uninstalling custom kernels installed through the script has to be done -manually, the script can can help out with some useful information: +Uninstalling custom kernels installed through the script has to be done manually. +The script can can help out with some useful information: ``` -cd path/to/linux-tkg/linux59-rc-tkg +cd path/to/linux-tkg/linux58-tkg ./install.sh uninstall-help ``` @@ -51,4 +49,3 @@ The command to do for that is: ``` ./install.sh config ``` - diff --git a/linux59-rc-tkg/customization.cfg b/linux59-tkg/customization.cfg similarity index 91% rename from linux59-rc-tkg/customization.cfg rename to linux59-tkg/customization.cfg index 4a2530e..21cb749 100644 --- a/linux59-rc-tkg/customization.cfg +++ b/linux59-tkg/customization.cfg @@ -1,13 +1,13 @@ # linux59-TkG config file -# Linux distribution you are using, options are "Arch", "Ubuntu", "Debian", "Fedora" or "Suse". -# It is automatically set to "Arch" when using PKGBUILD. +# Linux distribution you are using, options are "Arch", "Ubuntu", "Debian", "Fedora" or "Suse". +# It is automati9ally set to "Arch" when using PKGBUILD. # If left empty, the script will prompt _distro="" -#### MISC OPTIONS #### +#### MISC OPTIONS #### -# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux52-tkg.cfg +# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux59-tkg.cfg _EXT_CONFIG_PATH=~/.config/frogminer/linux59-tkg.cfg # [Arch specific] Set to anything else than "true" to limit cleanup operations and keep source and files generated during compilation. @@ -45,7 +45,8 @@ _diffconfig_name="" #### KERNEL OPTIONS #### -# [Arch specific] Name of the default config file to use from the linux???-tkg-config folder, use "distro" to use the config file of the kernel you are currently using. Arch default is "config.x86_64" and Arch hardened is "config_hardened.x86_64". +# [Arch specific] Name of the default config file to use from the linux???-tkg-config folder. +# Default is "config.x86_64" and hardened is "config_hardened.x86_64" (Arch-based). # To get a complete hardened setup, you have to use "cfs" as _cpusched _configfile="config.x86_64" @@ -54,9 +55,9 @@ _debugdisable="false" # LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME -# CPU scheduler - Options are "pds" or "cfs" -# "pds" is the recommended option for gaming -#_cpusched="" +# CPU scheduler - Options are "pds", "bmq" or "cfs" +# "upds" is the recommended option for gaming +_cpusched="" # CPU sched_yield_type - Choose what sort of yield sched_yield will perform # For PDS and MuQSS: 0: No yield. (Recommended option for gaming on PDS and MuQSS) @@ -65,7 +66,7 @@ _debugdisable="false" # For BMQ: 0: No yield. # 1: Deboost and requeue task. (Default) # 2: Set rq skip task. -_sched_yield_type="" +_sched_yield_type="0" # Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal # to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded. @@ -97,15 +98,9 @@ _OFenable="false" # Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false" _acs_override="" -# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false" -#_bcachefs="" - # Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch - Kernel default is "false" _zfsfix="true" -# Set to "true" to enable support for fsync, an experimental replacement for esync found in Valve Proton 4.11+ - https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305 -#_fsync="" - # A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true" _zenify="true" @@ -157,7 +152,7 @@ _custom_commandline="intel_pstate=passive" # !!! It will also change pkgname - If you don't explicitely need this, don't use it !!! _custom_pkgbase="" -# [non-Arch specific] Kernel localversion. Putting it to "Mario" will make for example the kernel version be 5.7.0-Mario (given by uname -r) +# [non-Arch specific] Kernel localversion. Putting it to "Mario" will make for example the kernel version be 5.7.0-tkg-Mario (given by uname -r) # If left empty, it will use -tkg-"${_cpusched}" where "${_cpusched}" will be replaced by the user chosen scheduler _kernel_localversion="" diff --git a/linux59-rc-tkg/install.sh b/linux59-tkg/install.sh similarity index 88% rename from linux59-rc-tkg/install.sh rename to linux59-tkg/install.sh index 0698d81..c8cfe5b 100755 --- a/linux59-rc-tkg/install.sh +++ b/linux59-tkg/install.sh @@ -21,18 +21,16 @@ set -e _where=`pwd` srcdir="$_where" -# This is an RC, so subver will always be 0 -_kernel_subver=0 source linux*-tkg-config/prepare -_cpu_opt_patch_link="https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch" +_cpu_opt_patch_link="https://raw.githubusercontent.com/graysky2/kernel_gcc_patch/master/enable_additional_cpu_optimizations_for_gcc_v10.1%2B_kernel_v5.8%2B.patch" source customization.cfg if [ "$1" != "install" ] && [ "$1" != "config" ] && [ "$1" != "uninstall-help" ]; then msg2 "Argument not recognised, options are: - - config : shallow clones the linux main git tree into the folder linux-main, then applies on it the extra patches and prepares the .config file + - config : shallow clones the linux ${_basekernel}.x git tree into the folder linux-${_basekernel}, then applies on it the extra patches and prepares the .config file by copying the one from the current linux system in /boot/config-`uname -r` and updates it. - install : [RPM and DEB based distros only], does the config step, proceeds to compile, then prompts to install - uninstall-help : [RPM and DEB based distros only], lists the installed kernels in this system, then gives a hint on how to uninstall them manually." @@ -99,29 +97,32 @@ if [ "$1" = "install" ] || [ "$1" = "config" ]; then _distro="" fi - if [ -d linux-main.orig ]; then - rm -rf linux-main.orig + if [ -d linux-${_basekernel}.orig ]; then + rm -rf linux-${_basekernel}.orig fi - if [ -d linux-main ]; then + if [ -d linux-${_basekernel} ]; then msg2 "Reseting files in linux-$_basekernel to their original state and getting latest updates" - cd "$_where"/linux-main - git reset --hard HEAD + cd "$_where"/linux-${_basekernel} + git checkout --force linux-$_basekernel.y git clean -f -d -x - git checkout master git pull - git checkout "v${_basekernel}-${_sub}" - msg2 "Done" + msg2 "Done" + cd "$_where" else - msg2 "Shallow git cloning linux kernel master branch" - # Shallow clone the past 3 weeks - _clone_start_date=$(date -d "$(date +"%Y/%m/%d") - 21 day" +"%Y/%m/%d") - git clone --branch master --single-branch --shallow-since=$_clone_start_date https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git "$_where"/linux-main - cd "$_where"/linux-main - git checkout "v${_basekernel}-${_sub}" + msg2 "Shallow git cloning linux $_basekernel" + git clone --branch linux-$_basekernel.y --single-branch --depth=1 https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git linux-${_basekernel} msg2 "Done" fi - cd "$_where" + + # Define current kernel subversion + if [ -z $_kernel_subver ]; then + cd "$_where"/linux-${_basekernel} + _kernelverstr=`git describe` + _kernel_subver=${_kernelverstr:5} + cd "$_where" + fi + # Run init script that is also run in PKGBUILD, it will define some env vars that we will use _tkg_initscript @@ -133,7 +134,7 @@ if [ "$1" = "install" ] || [ "$1" = "config" ]; then # Follow Ubuntu install isntructions in https://wiki.ubuntu.com/KernelTeam/GitKernelBuild # cd in linux folder, copy Ubuntu's current config file, update with new params - cd "$_where"/linux-main + cd "$_where"/linux-${_basekernel} msg2 "Copying current kernel's config and running make oldconfig..." cp /boot/config-`uname -r` .config @@ -203,7 +204,7 @@ if [ "$1" = "install" ]; then read -p "Do you want to install the new Kernel ? y/[n]: " _install if [[ $_install =~ [yY] ]] || [ $_install = "yes" ] || [ $_install = "Yes" ]; then cd "$_where" - _kernelname=$_basekernel.$_kernel_subver-$_sub-$_kernel_flavor + _kernelname=$_basekernel.$_kernel_subver-$_kernel_flavor _headers_deb="linux-headers-${_kernelname}*.deb" _image_deb="linux-image-${_kernelname}_*.deb" _kernel_devel_deb="linux-libc-dev_${_kernelname}*.deb" @@ -219,8 +220,7 @@ if [ "$1" = "install" ]; then # Se we can actually refer properly to the rpm files. _kernel_flavor=${_kernel_flavor//-/_} -# Doesn't seem to include -rc(x) by default, so will have to add it to EXTRAVERSION - if make -j ${_thread_num} rpm-pkg EXTRAVERSION="_${_sub}_${_kernel_flavor}"; then + if make -j ${_thread_num} rpm-pkg EXTRAVERSION="_${_kernel_flavor}"; then msg2 "Building successfully finished!" cd "$_where" @@ -237,7 +237,7 @@ if [ "$1" = "install" ]; then read -p "Do you want to install the new Kernel ? y/[n]: " _install if [ "$_install" = "y" ] || [ "$_install" = "Y" ] || [ "$_install" = "yes" ] || [ "$_install" = "Yes" ]; then - _kernelname=$_basekernel.${_kernel_subver}_${_sub}_$_kernel_flavor + _kernelname=$_basekernel.${_kernel_subver}_$_kernel_flavor _headers_rpm="kernel-headers-${_kernelname}*.rpm" _kernel_rpm="kernel-${_kernelname}*.rpm" _kernel_devel_rpm="kernel-devel-${_kernelname}*.rpm" diff --git a/linux59-rc-tkg/linux59-tkg-config/90-cleanup.hook b/linux59-tkg/linux59-tkg-config/90-cleanup.hook similarity index 100% rename from linux59-rc-tkg/linux59-tkg-config/90-cleanup.hook rename to linux59-tkg/linux59-tkg-config/90-cleanup.hook diff --git a/linux59-rc-tkg/linux59-tkg-config/cleanup b/linux59-tkg/linux59-tkg-config/cleanup similarity index 100% rename from linux59-rc-tkg/linux59-tkg-config/cleanup rename to linux59-tkg/linux59-tkg-config/cleanup diff --git a/linux59-rc-tkg/linux59-tkg-config/config.x86_64 b/linux59-tkg/linux59-tkg-config/config.x86_64 similarity index 99% rename from linux59-rc-tkg/linux59-tkg-config/config.x86_64 rename to linux59-tkg/linux59-tkg-config/config.x86_64 index e4944f2..9524eeb 100644 --- a/linux59-rc-tkg/linux59-tkg-config/config.x86_64 +++ b/linux59-tkg/linux59-tkg-config/config.x86_64 @@ -1,10 +1,10 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 5.9.0-rc4 Kernel Configuration +# Linux/x86 5.9.0 Kernel Configuration # -CONFIG_CC_VERSION_TEXT="gcc (TkG-mostlyportable) 10.2.1 20200730" +CONFIG_CC_VERSION_TEXT="gcc (GCC) 10.2.0" CONFIG_CC_IS_GCC=y -CONFIG_GCC_VERSION=100201 +CONFIG_GCC_VERSION=100200 CONFIG_LD_VERSION=235000000 CONFIG_CLANG_VERSION=0 CONFIG_CC_CAN_LINK=y @@ -44,7 +44,7 @@ CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y CONFIG_POSIX_MQUEUE_SYSCTL=y -# CONFIG_WATCH_QUEUE is not set +CONFIG_WATCH_QUEUE=y CONFIG_CROSS_MEMORY_ATTACH=y # CONFIG_USELIB is not set CONFIG_AUDIT=y @@ -141,7 +141,7 @@ CONFIG_RCU_FANOUT_LEAF=16 CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_BOOST=y CONFIG_RCU_BOOST_DELAY=500 -CONFIG_RCU_NOCB_CPU=y +# CONFIG_RCU_NOCB_CPU is not set # CONFIG_TASKS_TRACE_RCU_READ_MB is not set # end of RCU Subsystem @@ -243,6 +243,7 @@ CONFIG_SHMEM=y CONFIG_AIO=y CONFIG_IO_URING=y CONFIG_ADVISE_SYSCALLS=y +CONFIG_HAVE_ARCH_USERFAULTFD_WP=y CONFIG_MEMBARRIER=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y @@ -253,7 +254,7 @@ CONFIG_BPF_SYSCALL=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT_DEFAULT_ON=y -# CONFIG_USERFAULTFD is not set +CONFIG_USERFAULTFD=y CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y CONFIG_RSEQ=y # CONFIG_DEBUG_RSEQ is not set @@ -595,6 +596,7 @@ CONFIG_PMIC_OPREGION=y CONFIG_BYTCRC_PMIC_OPREGION=y CONFIG_CHTCRC_PMIC_OPREGION=y CONFIG_XPOWER_PMIC_OPREGION=y +CONFIG_BXT_WC_PMIC_OPREGION=y CONFIG_CHT_WC_PMIC_OPREGION=y CONFIG_CHT_DC_TI_PMIC_OPREGION=y CONFIG_ACPI_CONFIGFS=m @@ -1032,7 +1034,7 @@ CONFIG_THP_SWAP=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y # CONFIG_CMA is not set -# CONFIG_MEM_SOFT_DIRTY is not set +CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set @@ -1699,7 +1701,7 @@ CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_DEFAULT=y # CONFIG_DEFAULT_FQ is not set # CONFIG_DEFAULT_CODEL is not set -# CONFIG_DEFAULT_FQ_CODEL is not set +CONFIG_DEFAULT_FQ_CODEL=y # CONFIG_DEFAULT_FQ_PIE is not set # CONFIG_DEFAULT_SFQ is not set # CONFIG_DEFAULT_PFIFO_FAST is not set @@ -2312,7 +2314,7 @@ CONFIG_RFD_FTL=m CONFIG_SSFDC=m CONFIG_SM_FTL=m CONFIG_MTD_OOPS=m -# CONFIG_MTD_PSTORE is not set +CONFIG_MTD_PSTORE=m CONFIG_MTD_SWAP=m CONFIG_MTD_PARTITIONED_MASTER=y @@ -2508,6 +2510,9 @@ CONFIG_XEN_BLKDEV_BACKEND=m CONFIG_VIRTIO_BLK=m CONFIG_BLK_DEV_RBD=m CONFIG_BLK_DEV_RSXX=m +CONFIG_BLK_DEV_RNBD=y +CONFIG_BLK_DEV_RNBD_CLIENT=m +CONFIG_BLK_DEV_RNBD_SERVER=m # # NVME Support @@ -3739,9 +3744,10 @@ CONFIG_MT76x2U=m CONFIG_MT7603E=m CONFIG_MT7615_COMMON=m CONFIG_MT7615E=m -# CONFIG_MT7663U is not set +CONFIG_MT7663_USB_SDIO_COMMON=m +CONFIG_MT7663U=m # CONFIG_MT7663S is not set -# CONFIG_MT7915E is not set +CONFIG_MT7915E=m CONFIG_WLAN_VENDOR_MICROCHIP=y CONFIG_WILC1000=m CONFIG_WILC1000_SDIO=m @@ -3805,10 +3811,12 @@ CONFIG_RTW88_CORE=m CONFIG_RTW88_PCI=m CONFIG_RTW88_8822B=m CONFIG_RTW88_8822C=m +CONFIG_RTW88_8723D=m +CONFIG_RTW88_8821C=m CONFIG_RTW88_8822BE=m CONFIG_RTW88_8822CE=m -# CONFIG_RTW88_8723DE is not set -# CONFIG_RTW88_8821CE is not set +CONFIG_RTW88_8723DE=m +CONFIG_RTW88_8821CE=m CONFIG_RTW88_DEBUG=y CONFIG_RTW88_DEBUGFS=y CONFIG_WLAN_VENDOR_RSI=y @@ -4294,7 +4302,6 @@ CONFIG_SERIAL_OF_PLATFORM=m # CONFIG_SERIAL_MAX3100=m CONFIG_SERIAL_MAX310X=m -# CONFIG_SERIAL_IMX_EARLYCON is not set CONFIG_SERIAL_UARTLITE=m CONFIG_SERIAL_UARTLITE_NR_UARTS=1 CONFIG_SERIAL_CORE=y @@ -4583,6 +4590,7 @@ CONFIG_SPI_TLE62X0=m CONFIG_SPI_SLAVE=y CONFIG_SPI_SLAVE_TIME=m CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m +CONFIG_SPI_DYNAMIC=y CONFIG_SPMI=m CONFIG_HSI=m CONFIG_HSI_BOARDINFO=y @@ -4723,8 +4731,8 @@ CONFIG_GPIO_GW_PLD=m CONFIG_GPIO_MAX7300=m CONFIG_GPIO_MAX732X=m CONFIG_GPIO_PCA953X=m -# CONFIG_GPIO_PCA953X_IRQ is not set -# CONFIG_GPIO_PCA9570 is not set +CONFIG_GPIO_PCA953X_IRQ=y +CONFIG_GPIO_PCA9570=m CONFIG_GPIO_PCF857X=m CONFIG_GPIO_TPIC2810=m # end of I2C GPIO expanders @@ -4749,7 +4757,7 @@ CONFIG_GPIO_LP87565=m CONFIG_GPIO_MADERA=m CONFIG_GPIO_MAX77620=m CONFIG_GPIO_MAX77650=m -# CONFIG_GPIO_MSIC is not set +CONFIG_GPIO_MSIC=y CONFIG_GPIO_PALMAS=y CONFIG_GPIO_RC5T583=y CONFIG_GPIO_STMPE=y @@ -4764,6 +4772,7 @@ CONFIG_GPIO_TQMX86=m CONFIG_GPIO_TWL4030=m CONFIG_GPIO_TWL6040=m CONFIG_GPIO_UCB1400=m +CONFIG_GPIO_WHISKEY_COVE=m CONFIG_GPIO_WM831X=m CONFIG_GPIO_WM8350=m CONFIG_GPIO_WM8994=m @@ -4909,6 +4918,7 @@ CONFIG_CHARGER_MAX77650=m CONFIG_CHARGER_MAX77693=m CONFIG_CHARGER_MAX8997=m CONFIG_CHARGER_MAX8998=m +CONFIG_CHARGER_MP2629=m CONFIG_CHARGER_BQ2415X=m CONFIG_CHARGER_BQ24190=m CONFIG_CHARGER_BQ24257=m @@ -4963,7 +4973,7 @@ CONFIG_SENSORS_APPLESMC=m CONFIG_SENSORS_ASB100=m CONFIG_SENSORS_ASPEED=m CONFIG_SENSORS_ATXP1=m -# CONFIG_SENSORS_CORSAIR_CPRO is not set +CONFIG_SENSORS_CORSAIR_CPRO=m CONFIG_SENSORS_DRIVETEMP=m CONFIG_SENSORS_DS620=m CONFIG_SENSORS_DS1621=m @@ -4974,7 +4984,7 @@ CONFIG_SENSORS_I5K_AMB=m CONFIG_SENSORS_F71805F=m CONFIG_SENSORS_F71882FG=m CONFIG_SENSORS_F75375S=m -# CONFIG_SENSORS_GSC is not set +CONFIG_SENSORS_GSC=m CONFIG_SENSORS_MC13783_ADC=m CONFIG_SENSORS_FSCHMD=m CONFIG_SENSORS_FTSTEUTATES=m @@ -5175,6 +5185,7 @@ CONFIG_INT3406_THERMAL=m CONFIG_PROC_THERMAL_MMIO_RAPL=y # end of ACPI INT340X thermal drivers +CONFIG_INTEL_BXT_PMIC_THERMAL=m CONFIG_INTEL_PCH_THERMAL=m # end of Intel thermal drivers @@ -5346,10 +5357,10 @@ CONFIG_MFD_INTEL_QUARK_I2C_GPIO=m CONFIG_LPC_ICH=m CONFIG_LPC_SCH=m CONFIG_INTEL_SOC_PMIC=y -# CONFIG_INTEL_SOC_PMIC_BXTWC is not set +CONFIG_INTEL_SOC_PMIC_BXTWC=m CONFIG_INTEL_SOC_PMIC_CHTWC=y CONFIG_INTEL_SOC_PMIC_CHTDC_TI=m -# CONFIG_INTEL_SOC_PMIC_MRFLD is not set +CONFIG_INTEL_SOC_PMIC_MRFLD=m CONFIG_MFD_INTEL_LPSS=m CONFIG_MFD_INTEL_LPSS_ACPI=m CONFIG_MFD_INTEL_LPSS_PCI=m @@ -5483,7 +5494,7 @@ CONFIG_REGULATOR_BD71828=m CONFIG_REGULATOR_BD718XX=m CONFIG_REGULATOR_BD9571MWV=m CONFIG_REGULATOR_CPCAP=m -# CONFIG_REGULATOR_CROS_EC is not set +CONFIG_REGULATOR_CROS_EC=m CONFIG_REGULATOR_DA903X=m CONFIG_REGULATOR_DA9052=m CONFIG_REGULATOR_DA9055=m @@ -5492,7 +5503,7 @@ CONFIG_REGULATOR_DA9063=m CONFIG_REGULATOR_DA9210=m CONFIG_REGULATOR_DA9211=m CONFIG_REGULATOR_FAN53555=m -# CONFIG_REGULATOR_FAN53880 is not set +CONFIG_REGULATOR_FAN53880=m CONFIG_REGULATOR_GPIO=m CONFIG_REGULATOR_HI6421=m CONFIG_REGULATOR_HI6421V530=m @@ -5538,7 +5549,7 @@ CONFIG_REGULATOR_MT6323=m CONFIG_REGULATOR_MT6358=m CONFIG_REGULATOR_MT6397=m CONFIG_REGULATOR_PALMAS=m -# CONFIG_REGULATOR_PCA9450 is not set +CONFIG_REGULATOR_PCA9450=m CONFIG_REGULATOR_PCAP=m CONFIG_REGULATOR_PCF50633=m CONFIG_REGULATOR_PFUZE100=m @@ -5547,7 +5558,7 @@ CONFIG_REGULATOR_PV88080=m CONFIG_REGULATOR_PV88090=m CONFIG_REGULATOR_PWM=m CONFIG_REGULATOR_QCOM_SPMI=m -# CONFIG_REGULATOR_QCOM_USB_VBUS is not set +CONFIG_REGULATOR_QCOM_USB_VBUS=m CONFIG_REGULATOR_RC5T583=m CONFIG_REGULATOR_RK808=m CONFIG_REGULATOR_RN5T618=m @@ -5561,7 +5572,7 @@ CONFIG_REGULATOR_SLG51000=m CONFIG_REGULATOR_STPMIC1=m CONFIG_REGULATOR_SY8106A=m CONFIG_REGULATOR_SY8824X=m -# CONFIG_REGULATOR_SY8827N is not set +CONFIG_REGULATOR_SY8827N=m CONFIG_REGULATOR_TPS51632=m CONFIG_REGULATOR_TPS6105X=m CONFIG_REGULATOR_TPS62360=m @@ -5583,7 +5594,7 @@ CONFIG_REGULATOR_WM831X=m CONFIG_REGULATOR_WM8350=m CONFIG_REGULATOR_WM8400=m CONFIG_REGULATOR_WM8994=m -# CONFIG_REGULATOR_QCOM_LABIBB is not set +CONFIG_REGULATOR_QCOM_LABIBB=m CONFIG_RC_CORE=m CONFIG_RC_MAP=m CONFIG_LIRC=y @@ -5624,17 +5635,18 @@ CONFIG_IR_SERIAL=m CONFIG_IR_SERIAL_TRANSMITTER=y CONFIG_IR_SIR=m CONFIG_RC_XBOX_DVD=m -# CONFIG_IR_TOY is not set +CONFIG_IR_TOY=m CONFIG_CEC_CORE=m CONFIG_CEC_NOTIFIER=y CONFIG_CEC_PIN=y CONFIG_MEDIA_CEC_RC=y # CONFIG_CEC_PIN_ERROR_INJ is not set CONFIG_MEDIA_CEC_SUPPORT=y -# CONFIG_CEC_CH7322 is not set -# CONFIG_CEC_CROS_EC is not set +CONFIG_CEC_CH7322=m +CONFIG_CEC_CROS_EC=m CONFIG_CEC_GPIO=m -# CONFIG_CEC_SECO is not set +CONFIG_CEC_SECO=m +CONFIG_CEC_SECO_RC=y CONFIG_USB_PULSE8_CEC=m CONFIG_USB_RAINSHADOW_CEC=m CONFIG_MEDIA_SUPPORT=m @@ -6200,7 +6212,7 @@ CONFIG_VIDEO_S5C73M3=m CONFIG_VIDEO_AD5820=m CONFIG_VIDEO_AK7375=m CONFIG_VIDEO_DW9714=m -# CONFIG_VIDEO_DW9768 is not set +CONFIG_VIDEO_DW9768=m CONFIG_VIDEO_DW9807_VCM=m # end of Lens drivers @@ -6816,9 +6828,6 @@ CONFIG_HDMI=y # Console display driver support # CONFIG_VGA_CONSOLE=y -CONFIG_VGACON_SOFT_SCROLLBACK=y -CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 -# CONFIG_VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT is not set CONFIG_DUMMY_CONSOLE=y CONFIG_DUMMY_CONSOLE_COLUMNS=80 CONFIG_DUMMY_CONSOLE_ROWS=25 @@ -7948,6 +7957,7 @@ CONFIG_TYPEC_TCPM=m CONFIG_TYPEC_TCPCI=m CONFIG_TYPEC_RT1711H=m CONFIG_TYPEC_FUSB302=m +CONFIG_TYPEC_WCOVE=m CONFIG_TYPEC_UCSI=m CONFIG_UCSI_CCG=m CONFIG_UCSI_ACPI=m @@ -7958,7 +7968,7 @@ CONFIG_TYPEC_TPS6598X=m # USB Type-C Multiplexer/DeMultiplexer Switch support # CONFIG_TYPEC_MUX_PI3USB30532=m -# CONFIG_TYPEC_MUX_INTEL_PMC is not set +CONFIG_TYPEC_MUX_INTEL_PMC=m # end of USB Type-C Multiplexer/DeMultiplexer Switch support # @@ -8037,7 +8047,7 @@ CONFIG_MEMSTICK_REALTEK_USB=m CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_CLASS_FLASH=m -# CONFIG_LEDS_CLASS_MULTICOLOR is not set +CONFIG_LEDS_CLASS_MULTICOLOR=m CONFIG_LEDS_BRIGHTNESS_HW_CHANGED=y # @@ -8191,8 +8201,9 @@ CONFIG_INFINIBAND_SRP=m CONFIG_INFINIBAND_SRPT=m CONFIG_INFINIBAND_ISER=m CONFIG_INFINIBAND_ISERT=m -# CONFIG_INFINIBAND_RTRS_CLIENT is not set -# CONFIG_INFINIBAND_RTRS_SERVER is not set +CONFIG_INFINIBAND_RTRS=m +CONFIG_INFINIBAND_RTRS_CLIENT=m +CONFIG_INFINIBAND_RTRS_SERVER=m CONFIG_INFINIBAND_OPA_VNIC=m CONFIG_EDAC_ATOMIC_SCRUB=y CONFIG_EDAC_SUPPORT=y @@ -8476,7 +8487,8 @@ CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y CONFIG_VDPA=m CONFIG_VDPA_SIM=m CONFIG_IFCVF=m -# CONFIG_MLX5_VDPA is not set +CONFIG_MLX5_VDPA=y +CONFIG_MLX5_VDPA_NET=m CONFIG_VHOST_IOTLB=m CONFIG_VHOST_RING=m CONFIG_VHOST=m @@ -8702,7 +8714,17 @@ CONFIG_AD2S1210=m # CONFIG_FB_SM750 is not set CONFIG_STAGING_MEDIA=y -# CONFIG_INTEL_ATOMISP is not set +CONFIG_INTEL_ATOMISP=y +CONFIG_VIDEO_ATOMISP=m +CONFIG_VIDEO_ATOMISP_ISP2401=y +CONFIG_VIDEO_ATOMISP_OV5693=m +CONFIG_VIDEO_ATOMISP_OV2722=m +CONFIG_VIDEO_ATOMISP_GC2235=m +CONFIG_VIDEO_ATOMISP_MSRLIST_HELPER=m +CONFIG_VIDEO_ATOMISP_MT9M114=m +CONFIG_VIDEO_ATOMISP_GC0310=m +CONFIG_VIDEO_ATOMISP_OV2680=m +CONFIG_VIDEO_ATOMISP_LM3554=m CONFIG_VIDEO_IPU3_IMGU=m CONFIG_VIDEO_USBVISION=m @@ -8800,8 +8822,7 @@ CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y # CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set CONFIG_THINKPAD_ACPI_VIDEO=y CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y -# CONFIG_INTEL_ATOMISP2_LED is not set -CONFIG_INTEL_ATOMISP2_PM=m +CONFIG_INTEL_ATOMISP2_LED=m CONFIG_INTEL_CHT_INT33FE=m CONFIG_INTEL_HID_EVENT=m CONFIG_INTEL_INT0002_VGPIO=m @@ -8844,9 +8865,11 @@ CONFIG_INTEL_SPEED_SELECT_INTERFACE=m CONFIG_INTEL_TURBO_MAX_3=y CONFIG_INTEL_UNCORE_FREQ_CONTROL=m +CONFIG_INTEL_BXTWC_PMIC_TMU=m CONFIG_INTEL_CHTDC_TI_PWRBTN=m -# CONFIG_INTEL_MFLD_THERMAL is not set -# CONFIG_INTEL_MID_POWER_BUTTON is not set +CONFIG_INTEL_MFLD_THERMAL=m +CONFIG_INTEL_MID_POWER_BUTTON=m +CONFIG_INTEL_MRFLD_PWRBTN=m CONFIG_INTEL_PMC_CORE=y CONFIG_INTEL_PUNIT_IPC=m CONFIG_INTEL_SCU_IPC=y @@ -9051,6 +9074,7 @@ CONFIG_EXTCON_FSA9480=m CONFIG_EXTCON_GPIO=m CONFIG_EXTCON_INTEL_INT3496=m CONFIG_EXTCON_INTEL_CHT_WC=m +CONFIG_EXTCON_INTEL_MRFLD=m CONFIG_EXTCON_MAX14577=m CONFIG_EXTCON_MAX3355=m CONFIG_EXTCON_MAX77693=m @@ -9159,6 +9183,7 @@ CONFIG_ENVELOPE_DETECTOR=m CONFIG_HI8435=m CONFIG_HX711=m CONFIG_INA2XX_ADC=m +CONFIG_INTEL_MRFLD_ADC=m CONFIG_LP8788_ADC=m CONFIG_LTC2471=m CONFIG_LTC2485=m @@ -9174,7 +9199,7 @@ CONFIG_MCP320X=m CONFIG_MCP3422=m CONFIG_MCP3911=m CONFIG_MEN_Z188_ADC=m -# CONFIG_MP2629_ADC is not set +CONFIG_MP2629_ADC=m CONFIG_NAU7802=m CONFIG_PALMAS_GPADC=m CONFIG_QCOM_VADC_COMMON=m @@ -9390,8 +9415,9 @@ CONFIG_FXOS8700=m CONFIG_FXOS8700_I2C=m CONFIG_FXOS8700_SPI=m CONFIG_KMX61=m -# CONFIG_INV_ICM42600_I2C is not set -# CONFIG_INV_ICM42600_SPI is not set +CONFIG_INV_ICM42600=m +CONFIG_INV_ICM42600_I2C=m +CONFIG_INV_ICM42600_SPI=m CONFIG_INV_MPU6050_IIO=m CONFIG_INV_MPU6050_I2C=m CONFIG_INV_MPU6050_SPI=m @@ -9815,10 +9841,10 @@ CONFIG_SIOX=m CONFIG_SIOX_BUS_GPIO=m CONFIG_SLIMBUS=m CONFIG_SLIM_QCOM_CTRL=m -# CONFIG_INTERCONNECT is not set +CONFIG_INTERCONNECT=y CONFIG_COUNTER=m CONFIG_FTM_QUADDEC=m -# CONFIG_MICROCHIP_TCB_CAPTURE is not set +CONFIG_MICROCHIP_TCB_CAPTURE=m CONFIG_MOST=m # CONFIG_MOST_USB_HDM is not set # end of Device Drivers @@ -10223,6 +10249,7 @@ CONFIG_PERSISTENT_KEYRINGS=y CONFIG_TRUSTED_KEYS=m CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_DH_OPERATIONS=y +CONFIG_KEY_NOTIFICATIONS=y # CONFIG_SECURITY_DMESG_RESTRICT is not set CONFIG_SECURITY=y CONFIG_SECURITYFS=y @@ -10473,7 +10500,7 @@ CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=y -CONFIG_CRYPTO_LZ4HC=y +CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_ZSTD=y # @@ -10613,7 +10640,7 @@ CONFIG_ZLIB_DEFLATE=y CONFIG_LZO_COMPRESS=y CONFIG_LZO_DECOMPRESS=y CONFIG_LZ4_COMPRESS=y -CONFIG_LZ4HC_COMPRESS=y +CONFIG_LZ4HC_COMPRESS=m CONFIG_LZ4_DECOMPRESS=y CONFIG_ZSTD_COMPRESS=y CONFIG_ZSTD_DECOMPRESS=y @@ -10763,6 +10790,7 @@ CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y # CONFIG_UBSAN is not set +CONFIG_HAVE_ARCH_KCSAN=y # end of Generic Kernel Debugging Instruments CONFIG_DEBUG_KERNEL=y @@ -10954,7 +10982,6 @@ CONFIG_HIST_TRIGGERS=y # CONFIG_HIST_TRIGGERS_DEBUG is not set # CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set # CONFIG_SAMPLES is not set -CONFIG_HAVE_ARCH_KCSAN=y CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y CONFIG_STRICT_DEVMEM=y CONFIG_IO_STRICT_DEVMEM=y diff --git a/linux59-rc-tkg/linux59-tkg-config/generic-desktop-profile.cfg b/linux59-tkg/linux59-tkg-config/generic-desktop-profile.cfg similarity index 66% rename from linux59-rc-tkg/linux59-tkg-config/generic-desktop-profile.cfg rename to linux59-tkg/linux59-tkg-config/generic-desktop-profile.cfg index 9716d59..9f33a13 100644 --- a/linux59-rc-tkg/linux59-tkg-config/generic-desktop-profile.cfg +++ b/linux59-tkg/linux59-tkg-config/generic-desktop-profile.cfg @@ -2,16 +2,8 @@ # Generic Desktop -#### MISC OPTIONS #### - -# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux50-tkg.cfg -_EXT_CONFIG_PATH=~/.config/frogminer/linux57-tkg.cfg - #### KERNEL OPTIONS #### -# Name of the default config file to use from the linux???-tkg-config folder. Arch default is "config.x86_64". -_configfile="config.x86_64" - # Disable some non-module debugging - See PKGBUILD for the list _debugdisable="false" @@ -41,15 +33,3 @@ _runqueue_sharing="mc" # Timer frequency - "500", "750" or "1000" - More options available in kernel config prompt when left empty depending on selected cpusched - Kernel default is "750" _timer_freq="500" - - -#### USER PATCHES #### - -# You can use your own patches by putting them in the same folder as the PKGBUILD and giving them the .mypatch extension. -# You can also revert patches by putting them in the same folder as the PKGBUILD and giving them the .myrevert extension. - -# Also, userpatches variable below must be set to true for the above to work. -_user_patches="true" - -# Apply all user patches without confirmation - !!! NOT RECOMMENDED !!! -_user_patches_no_confirm="false" diff --git a/linux59-rc-tkg/linux59-tkg-config/prepare b/linux59-tkg/linux59-tkg-config/prepare similarity index 95% rename from linux59-rc-tkg/linux59-tkg-config/prepare rename to linux59-tkg/linux59-tkg-config/prepare index f84fc22..993975c 100644 --- a/linux59-rc-tkg/linux59-tkg-config/prepare +++ b/linux59-tkg/linux59-tkg-config/prepare @@ -2,7 +2,7 @@ _basever=59 _basekernel=5.9 -_sub=rc8 +_sub=0 _tkg_initscript() { @@ -31,35 +31,37 @@ _tkg_initscript() { fi # CPU SCHED selector - #if [ -z "$_cpusched" ] && [ ! -e "$_where"/cpuschedset ]; then - # plain "What CPU sched variant do you want to build/install?" - # read -rp "`echo $' > 1.PDS\n 2.CFS\nchoice[1-2?]: '`" CONDITION; - # if [ "$CONDITION" = "2" ]; then - # echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset - # else - # echo "_cpusched=\"pds\"" > "$_where"/cpuschedset - # fi + if [ -z "$_cpusched" ] && [ ! -e "$_where"/cpuschedset ]; then + plain "What CPU sched variant do you want to build/install?" + read -rp "`echo $' > 1.Project C / PDS\n 2.Project C / BMQ\n 3.CFS\nchoice[1-3?]: '`" CONDITION; + if [ "$CONDITION" = "2" ]; then + echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset + elif [ "$CONDITION" = "3" ]; then + echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset + else + echo "_cpusched=\"pds\"" > "$_where"/cpuschedset + fi if [ -n "$_custom_pkgbase" ]; then echo "_custom_pkgbase=\"${_custom_pkgbase}\"" >> "$_where"/cpuschedset fi - #elif [ "$_cpusched" = "muqss" ] || [ "$_cpusched" = "MuQSS" ]; then - # echo "_cpusched=\"MuQSS\"" > "$_where"/cpuschedset - #elif [ "$_cpusched" = "pds" ]; then - # echo "_cpusched=\"pds\"" > "$_where"/cpuschedset - #elif [ "$_cpusched" = "bmq" ]; then - # echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset - #else - # if [ "$_nofallback" != "true" ]; then - # warning "Something is wrong with your cpusched selection. Do you want to fallback to CFS (default)?" - # read -rp "`echo $' > N/y : '`" _fallback; - # fi - # if [[ "$_fallback" =~ [yY] ]] || [ "$_nofallback" = "true" ]; then + elif [ "$_cpusched" = "pds" ]; then + echo "_cpusched=\"pds\"" > "$_where"/cpuschedset + elif [ "$_cpusched" = "cfs" ]; then + echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset + elif [ "$_cpusched" = "bmq" ]; then + echo "_cpusched=\"bmq\"" > "$_where"/cpuschedset + else + if [ "$_nofallback" != "true" ]; then + warning "Something is wrong with your cpusched selection. Do you want to fallback to CFS (default)?" + read -rp "`echo $' > N/y : '`" _fallback; + fi + if [[ "$_fallback" =~ [yY] ]] || [ "$_nofallback" = "true" ]; then echo "_cpusched=\"cfs\"" > "$_where"/cpuschedset - # else - # error "Exiting..." - # exit 1 - # fi - #fi + else + error "Exiting..." + exit 1 + fi + fi source "$_where"/cpuschedset } @@ -117,7 +119,7 @@ _tkg_srcprep() { msg2 "Setting version..." scripts/setlocalversion --save-scmversion echo "-$pkgrel-tkg-${_cpusched}" > localversion.10-pkgrel - echo "" > localversion.20-pkgname + echo "" > localversion.20-pkgname # add upstream patch #msg2 "Patching from $_basekernel to $pkgver" @@ -164,7 +166,7 @@ _tkg_srcprep() { # MuQSS msg2 "Applying MuQSS base patch" patch -Np1 -i "$srcdir"/0004-5.9-ck1.patch - + if [ "${_aggressive_ondemand}" = "true" ]; then msg2 "Applying MuQSS agressive ondemand governor patch" patch -Np1 -i "$srcdir"/0004-glitched-ondemand-muqss.patch @@ -172,24 +174,24 @@ _tkg_srcprep() { msg2 "Applying Glitched MuQSS patch" patch -Np1 -i "$srcdir"/0004-glitched-muqss.patch - + elif [ "${_cpusched}" = "pds" ]; then # PDS-mq msg2 "Applying PDS base patch" - patch -Np1 -i "$srcdir"/0005-v5.9_undead-pds099o.patch + patch -Np1 -i "$srcdir"/0009-prjc_v5.9-r0.patch if [ "${_aggressive_ondemand}" = "true" ]; then msg2 "Applying PDS agressive ondemand governor patch" - patch -Np1 -i "$srcdir"/0005-glitched-ondemand-pds.patch + patch -Np1 -i "$srcdir"/0009-glitched-ondemand-bmq.patch fi msg2 "Applying Glitched PDS patch" patch -Np1 -i "$srcdir"/0005-glitched-pds.patch - + elif [ "${_cpusched}" = "bmq" ]; then # Project C / BMQ msg2 "Applying Project C / BMQ base patch" - + patch -Np1 -i "$srcdir"/0009-prjc_v5.9-r0.patch if [ "${_aggressive_ondemand}" = "true" ]; then @@ -211,7 +213,7 @@ _tkg_srcprep() { fi cat "${srcdir}/${_configfile}" > ./.config - fi + fi # Set some -tkg defaults @@ -227,12 +229,17 @@ _tkg_srcprep() { sed -i -e 's/CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y/# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set/' ./.config sed -i -e 's/# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set/CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4=y/' ./.config sed -i -e 's/CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo"/CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lz4"/' ./.config - #sed -i -e 's/CONFIG_RCU_BOOST_DELAY=500/CONFIG_RCU_BOOST_DELAY=0/' ./.config + sed -i -e 's/CONFIG_RCU_BOOST_DELAY=500/CONFIG_RCU_BOOST_DELAY=0/' ./.config sed -i -e 's/# CONFIG_CMDLINE_BOOL is not set/CONFIG_CMDLINE_BOOL=y/' ./.config echo "CONFIG_CMDLINE=\"${_custom_commandline}\"" >> ./.config echo "# CONFIG_CMDLINE_OVERRIDE is not set" >> ./.config echo "# CONFIG_X86_P6_NOP is not set" >> ./.config - if [ "$_noccache" != "true" ]; then + + # openrgb + echo "CONFIG_I2C_NCT6775=m" >> ./.config + + # ccache fix + if [ "$_noccache" != "true" ]; then if { [ "$_distro" = "Arch" ] && pacman -Qq ccache &> /dev/null; } || { [ "$_distro" = "Ubuntu" ] && dpkg -l ccache > /dev/null; }; then sed -i -e 's/CONFIG_GCC_PLUGINS=y/# CONFIG_GCC_PLUGINS is not set/' ./.config fi @@ -293,10 +300,14 @@ _tkg_srcprep() { echo "CONFIG_SCHED_MUQSS=y" >> ./.config elif [ "${_cpusched}" = "pds" ]; then # PDS default config + echo "CONFIG_SCHED_ALT=y" >> ./.config echo "CONFIG_SCHED_PDS=y" >> ./.config + echo "# CONFIG_SCHED_BMQ is not set" >> ./.config elif [ "${_cpusched}" = "bmq" ]; then # BMQ default config echo "CONFIG_SCHED_ALT=y" >> ./.config + echo "CONFIG_SCHED_BMQ=y" >> ./.config + echo "# CONFIG_SCHED_PDS is not set" >> ./.config fi if [ "${_cpusched}" = "MuQSS" ] || [ "${_cpusched}" = "pds" ] || [ "${_cpusched}" = "bmq" ]; then @@ -326,7 +337,7 @@ _tkg_srcprep() { fi fi if [ "$CONDITION0" = "0" ]; then - if [ "${_cpusched}" = "bmq" ]; then + if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/alt_core.c else sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/"${_cpusched}".c @@ -334,7 +345,7 @@ _tkg_srcprep() { elif [ "$CONDITION0" = "1" ]; then msg2 "Using default CPU sched yield type (1)" elif [ "$CONDITION0" = "2" ]; then - if [ "${_cpusched}" = "bmq" ]; then + if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/alt_core.c else sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 2;/' ./kernel/sched/"${_cpusched}".c @@ -342,7 +353,7 @@ _tkg_srcprep() { else if [ "${_cpusched}" = "MuQSS" ]; then msg2 "Using default CPU sched yield type (1)" - elif [ "${_cpusched}" = "bmq" ]; then + elif [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/alt_core.c else sed -i -e 's/int sched_yield_type __read_mostly = 1;/int sched_yield_type __read_mostly = 0;/' ./kernel/sched/"${_cpusched}".c @@ -385,13 +396,11 @@ _tkg_srcprep() { if [ "$_rrvalue" != "default" ]; then if [ "${_cpusched}" = "MuQSS" ]; then sed -i -e "s/int rr_interval __read_mostly = 6;/int rr_interval __read_mostly = ${_rrvalue};/" ./kernel/sched/"${_cpusched}".c - elif [ "${_cpusched}" = "pds" ]; then - sed -i -e "s/#define SCHED_DEFAULT_RR (4)/#define SCHED_DEFAULT_RR (${_rrvalue})/" ./kernel/sched/"${_cpusched}".c - elif [ "${_cpusched}" = "bmq" ]; then + elif [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then sed -i -e "s/u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);/u64 sched_timeslice_ns __read_mostly = (${_rrvalue} * 1000 * 1000);/" ./kernel/sched/alt_core.c fi else - if [ "${_cpusched}" = "bmq" ]; then + if [ "${_cpusched}" = "bmq" ] || [ "${_cpusched}" = "pds" ]; then sed -i -e "s/u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000);/u64 sched_timeslice_ns __read_mostly = (2 * 1000 * 1000);/" ./kernel/sched/alt_core.c fi fi @@ -727,7 +736,7 @@ _tkg_srcprep() { #fi #if [[ "$CONDITION8" =~ [yY] ]] || [ "$_bcachefs" = "true" ]; then # msg2 "Patching Bcache filesystem support override" - # patch -Np1 -i "$srcdir"/0008-5.9-bcachefs.patch + # patch -Np1 -i "$srcdir"/0008-5.8-bcachefs.patch # echo "CONFIG_BCACHEFS_FS=m" >> ./.config # echo "CONFIG_BCACHEFS_QUOTA=y" >> ./.config # echo "CONFIG_BCACHEFS_POSIX_ACL=y" >> ./.config @@ -745,7 +754,7 @@ _tkg_srcprep() { #fi #if [[ "$CONDITION9" =~ [yY] ]] || [ "$_fsync" = "true" ]; then # msg2 "Patching Fsync support" - # patch -Np1 -i "$srcdir"/0007-v5.9-fsync.patch + # patch -Np1 -i "$srcdir"/0007-v5.8-fsync.patch #fi # ZFS fix @@ -790,7 +799,7 @@ _tkg_srcprep() { # get kernel version make prepare fi - + # modprobed-db if [ -z "$_modprobeddb" ]; then plain "" @@ -965,7 +974,7 @@ exit_cleanup() { rm -f "$srcdir"/linux-${_basekernel}/lib/sradix-tree.c rm -f "$srcdir"/linux-${_basekernel}/mm/uksm.c fi - + remove_deps fi diff --git a/linux59-rc-tkg/linux59-tkg-config/ryzen-desktop-profile.cfg b/linux59-tkg/linux59-tkg-config/ryzen-desktop-profile.cfg similarity index 69% rename from linux59-rc-tkg/linux59-tkg-config/ryzen-desktop-profile.cfg rename to linux59-tkg/linux59-tkg-config/ryzen-desktop-profile.cfg index 3057aad..510d06e 100644 --- a/linux59-rc-tkg/linux59-tkg-config/ryzen-desktop-profile.cfg +++ b/linux59-tkg/linux59-tkg-config/ryzen-desktop-profile.cfg @@ -2,16 +2,8 @@ # Ryzen Desktop -#### MISC OPTIONS #### - -# External config file to use - If the given file exists in path, it will override default config (customization.cfg) - Default is ~/.config/frogminer/linux52-tkg.cfg -_EXT_CONFIG_PATH=~/.config/frogminer/linux57-tkg.cfg - #### KERNEL OPTIONS #### -# Name of the default config file to use from the linux???-tkg-config folder. Arch default is "config.x86_64". -_configfile="config.x86_64" - # Disable some non-module debugging - See PKGBUILD for the list _debugdisable="false" @@ -44,15 +36,3 @@ _timer_freq="500" # Default CPU governor - "performance", "ondemand" (tweaked), "schedutil" or leave empty for default (schedutil on AMD and legacy Intel, intel_pstate on modern Intel) - Enforcing an option will disable intel_pstate altogether! _default_cpu_gov="performance" - - -#### USER PATCHES #### - -# You can use your own patches by putting them in the same folder as the PKGBUILD and giving them the .mypatch extension. -# You can also revert patches by putting them in the same folder as the PKGBUILD and giving them the .myrevert extension. - -# Also, userpatches variable below must be set to true for the above to work. -_user_patches="true" - -# Apply all user patches without confirmation - !!! NOT RECOMMENDED !!! -_user_patches_no_confirm="false" diff --git a/linux59-rc-tkg/linux59-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch b/linux59-tkg/linux59-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch similarity index 100% rename from linux59-rc-tkg/linux59-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch rename to linux59-tkg/linux59-tkg-patches/0001-add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by.patch diff --git a/linux59-rc-tkg/linux59-tkg-patches/0002-clear-patches.patch b/linux59-tkg/linux59-tkg-patches/0002-clear-patches.patch similarity index 75% rename from linux59-rc-tkg/linux59-tkg-patches/0002-clear-patches.patch rename to linux59-tkg/linux59-tkg-patches/0002-clear-patches.patch index a7c9d4a..22a32f5 100644 --- a/linux59-rc-tkg/linux59-tkg-patches/0002-clear-patches.patch +++ b/linux59-tkg/linux59-tkg-patches/0002-clear-patches.patch @@ -1,4 +1,4 @@ -From 2ac70785613ef4c6b16414986bb18bd7b60d2a13 Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 14 Mar 2016 11:10:58 -0600 Subject: [PATCH] pci pme wakeups @@ -10,10 +10,10 @@ boards (sadly, too many of them) in laptops. 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c -index c25acace7d91..0ddebdad9f5b 100644 +index c9338f9..6974fbf 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c -@@ -61,7 +61,7 @@ struct pci_pme_device { +@@ -62,7 +62,7 @@ struct pci_pme_device { struct pci_dev *dev; }; @@ -23,9 +23,9 @@ index c25acace7d91..0ddebdad9f5b 100644 static void pci_dev_d3_sleep(struct pci_dev *dev) { -- -2.20.1 +https://clearlinux.org -From 7e7e36c67aa71d6a1ec5676d99d37c1fea389ceb Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 19 Mar 2016 21:32:19 -0400 Subject: [PATCH] intel_idle: tweak cpuidle cstates @@ -41,19 +41,19 @@ performance while keeping power efficiency 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c -index 8b5d85c91e9d..5e2d813a048d 100644 +index f449584..c994d24 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c -@@ -466,7 +466,7 @@ static struct cpuidle_state hsw_cstates[] = { +@@ -531,7 +531,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -474,7 +474,7 @@ static struct cpuidle_state hsw_cstates[] = { +@@ -539,7 +539,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 33, @@ -62,7 +62,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -482,7 +482,7 @@ static struct cpuidle_state hsw_cstates[] = { +@@ -547,7 +547,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, @@ -71,7 +71,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -490,7 +490,7 @@ static struct cpuidle_state hsw_cstates[] = { +@@ -555,7 +555,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x32", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, @@ -80,7 +80,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -498,7 +498,7 @@ static struct cpuidle_state hsw_cstates[] = { +@@ -563,7 +563,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, @@ -89,7 +89,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -506,7 +506,7 @@ static struct cpuidle_state hsw_cstates[] = { +@@ -571,7 +571,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, @@ -98,7 +98,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -514,7 +514,7 @@ static struct cpuidle_state hsw_cstates[] = { +@@ -579,7 +579,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, @@ -107,16 +107,16 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -534,7 +534,7 @@ static struct cpuidle_state bdw_cstates[] = { +@@ -599,7 +599,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -542,7 +542,7 @@ static struct cpuidle_state bdw_cstates[] = { +@@ -607,7 +607,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 40, @@ -125,7 +125,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -550,7 +550,7 @@ static struct cpuidle_state bdw_cstates[] = { +@@ -615,7 +615,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, @@ -134,7 +134,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -558,7 +558,7 @@ static struct cpuidle_state bdw_cstates[] = { +@@ -623,7 +623,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x32", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, @@ -143,7 +143,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -566,7 +566,7 @@ static struct cpuidle_state bdw_cstates[] = { +@@ -631,7 +631,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, @@ -152,7 +152,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -574,7 +574,7 @@ static struct cpuidle_state bdw_cstates[] = { +@@ -639,7 +639,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, @@ -161,7 +161,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -582,7 +582,7 @@ static struct cpuidle_state bdw_cstates[] = { +@@ -647,7 +647,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, @@ -170,16 +170,16 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -603,7 +603,7 @@ static struct cpuidle_state skl_cstates[] = { +@@ -668,7 +668,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -611,7 +611,7 @@ static struct cpuidle_state skl_cstates[] = { +@@ -676,7 +676,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 70, @@ -188,7 +188,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -619,7 +619,7 @@ static struct cpuidle_state skl_cstates[] = { +@@ -684,7 +684,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 85, @@ -197,7 +197,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -627,7 +627,7 @@ static struct cpuidle_state skl_cstates[] = { +@@ -692,7 +692,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x33", .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 124, @@ -206,7 +206,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -635,7 +635,7 @@ static struct cpuidle_state skl_cstates[] = { +@@ -700,7 +700,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, @@ -215,7 +215,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -643,7 +643,7 @@ static struct cpuidle_state skl_cstates[] = { +@@ -708,7 +708,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 480, @@ -224,7 +224,7 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -651,7 +651,7 @@ static struct cpuidle_state skl_cstates[] = { +@@ -716,7 +716,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 890, @@ -233,9 +233,9 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -@@ -672,7 +672,7 @@ static struct cpuidle_state skx_cstates[] = { +@@ -737,7 +737,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 300, @@ -243,9 +243,9 @@ index 8b5d85c91e9d..5e2d813a048d 100644 .enter_s2idle = intel_idle_s2idle, }, { -- -2.20.1 +https://clearlinux.org -From b8211d4f79dd88dfc2d4bd52be46103ea0b70e3e Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 6 Jan 2017 15:34:09 +0000 Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little @@ -256,10 +256,10 @@ Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index cf3c5095c10e..b30d51837b2d 100644 +index 30c1142..4345075 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c -@@ -3897,8 +3897,8 @@ void __init tcp_init(void) +@@ -4201,8 +4201,8 @@ void __init tcp_init(void) tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); @@ -271,9 +271,9 @@ index cf3c5095c10e..b30d51837b2d 100644 init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; -- -2.20.1 +https://clearlinux.org -From 050223869257b87e22636158a80da38d877248ed Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 18 Feb 2018 23:35:41 +0000 Subject: [PATCH] locking: rwsem: spin faster @@ -284,10 +284,10 @@ tweak rwsem owner spinning a bit 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c -index eef04551eae7..1ec5ab4c8ff7 100644 +index f11b9bd..1bbfcc1 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c -@@ -720,6 +720,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) +@@ -717,6 +717,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) struct task_struct *new, *owner; unsigned long flags, new_flags; enum owner_state state; @@ -295,7 +295,7 @@ index eef04551eae7..1ec5ab4c8ff7 100644 owner = rwsem_owner_flags(sem, &flags); state = rwsem_owner_state(owner, flags, nonspinnable); -@@ -753,7 +754,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) +@@ -750,7 +751,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable) break; } @@ -305,10 +305,13 @@ index eef04551eae7..1ec5ab4c8ff7 100644 } rcu_read_unlock(); -From b836ea320114643d4354b43acb6ec8bb06ada487 Mon Sep 17 00:00:00 2001 +-- +https://clearlinux.org + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 2 Jun 2016 23:36:32 -0500 -Subject: [PATCH] drivers: Initialize ata before graphics +Subject: [PATCH] initialize ata before graphics ATA init is the long pole in the boot process, and its asynchronous. move the graphics init after it so that ata and graphics initialize @@ -318,10 +321,10 @@ in parallel 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/Makefile b/drivers/Makefile -index aaef17cc6512..d08f3a394929 100644 +index c0cd1b9..af1e2fb 100644 --- a/drivers/Makefile +++ b/drivers/Makefile -@@ -58,15 +58,8 @@ obj-y += char/ +@@ -59,15 +59,8 @@ obj-y += char/ # iommu/ comes before gpu as gpu are using iommu controllers obj-y += iommu/ @@ -337,7 +340,7 @@ index aaef17cc6512..d08f3a394929 100644 obj-$(CONFIG_PARPORT) += parport/ obj-$(CONFIG_NVM) += lightnvm/ obj-y += base/ block/ misc/ mfd/ nfc/ -@@ -79,6 +72,14 @@ obj-$(CONFIG_IDE) += ide/ +@@ -80,6 +73,14 @@ obj-$(CONFIG_IDE) += ide/ obj-y += scsi/ obj-y += nvme/ obj-$(CONFIG_ATA) += ata/ @@ -352,3 +355,6 @@ index aaef17cc6512..d08f3a394929 100644 obj-$(CONFIG_TARGET_CORE) += target/ obj-$(CONFIG_MTD) += mtd/ obj-$(CONFIG_SPI) += spi/ +-- +https://clearlinux.org + diff --git a/linux59-rc-tkg/linux59-tkg-patches/0003-glitched-base.patch b/linux59-tkg/linux59-tkg-patches/0003-glitched-base.patch similarity index 62% rename from linux59-rc-tkg/linux59-tkg-patches/0003-glitched-base.patch rename to linux59-tkg/linux59-tkg-patches/0003-glitched-base.patch index 4de65d6..fb09b35 100644 --- a/linux59-rc-tkg/linux59-tkg-patches/0003-glitched-base.patch +++ b/linux59-tkg/linux59-tkg-patches/0003-glitched-base.patch @@ -1,13 +1,17 @@ From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Wed, 4 Jul 2018 04:30:08 +0200 -Subject: glitched +Subject: [PATCH 01/17] glitched + +--- + scripts/mkcompile_h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h -index 87f1fc9..b3be470 100755 +index baf3ab8d9d49..854e32e6aec7 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h -@@ -50,8 +50,8 @@ else +@@ -41,8 +41,8 @@ else fi UTS_VERSION="#$VERSION" @@ -16,13 +20,28 @@ index 87f1fc9..b3be470 100755 +CONFIG_FLAGS="TKG" +if [ -n "$SMP" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS SMP"; fi if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi - UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP" + if [ -n "$PREEMPT_RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT_RT"; fi + +-- +2.28.0 + + +From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Mon, 29 Jan 2018 16:59:22 +0000 +Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which + VFS caches are reclaimed + +Signed-off-by: Alexandre Frade +--- + fs/dcache.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c -index 2acfc69878f5..3f1131431e06 100644 +index 361ea7ab30ea..0c5cf69b241a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -69,7 +69,7 @@ +@@ -71,7 +71,7 @@ * If no ancestor relationship: * arbitrary, since it's serialized on rename_lock */ @@ -31,11 +50,26 @@ index 2acfc69878f5..3f1131431e06 100644 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); +-- +2.28.0 + + +From 28f32f59d9d55ac7ec3a20b79bdd02d2a0a5f7e1 Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Mon, 29 Jan 2018 18:29:13 +0000 +Subject: [PATCH 03/17] sched/core: nr_migrate = 128 increases number of tasks + to iterate in a single balance run. + +Signed-off-by: Alexandre Frade +--- + kernel/sched/core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 211890edf37e..37121563407d 100644 +index f788cd61df21..2bfbb4213707 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -41,7 +41,7 @@ const_debug unsigned int sysctl_sched_features = +@@ -59,7 +59,7 @@ const_debug unsigned int sysctl_sched_features = * Number of tasks to iterate in a single balance run. * Limited because this is done with IRQs disabled. */ @@ -43,8 +77,8 @@ index 211890edf37e..37121563407d 100644 +const_debug unsigned int sysctl_sched_nr_migrate = 128; /* - * period over which we average the RT time consumption, measured -@@ -61,9 +61,9 @@ __read_mostly int scheduler_running; + * period over which we measure -rt task CPU usage in us. +@@ -71,9 +71,9 @@ __read_mostly int scheduler_running; /* * part of the period that we allow rt tasks to run in us. @@ -56,8 +90,22 @@ index 211890edf37e..37121563407d 100644 /* * __task_rq_lock - lock the rq @p resides on. +-- +2.28.0 + + +From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Mon, 29 Jan 2018 17:41:29 +0000 +Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo + +Signed-off-by: Alexandre Frade +--- + scripts/setlocalversion | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + diff --git a/scripts/setlocalversion b/scripts/setlocalversion -index 71f39410691b..288f9679e883 100755 +index 20f2efd57b11..0552d8b9f582 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -54,7 +54,7 @@ scm_version() @@ -69,18 +117,32 @@ index 71f39410691b..288f9679e883 100755 return fi # If we are past a tagged commit (like +-- +2.28.0 -From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001 -From: Etienne Juvigny -Date: Mon, 3 Sep 2018 17:36:25 +0200 -Subject: Zenify & stuff +From 61fcb33fb0de8bc0f060e0a1ada38ed149217f4d Mon Sep 17 00:00:00 2001 +From: Oleksandr Natalenko +Date: Wed, 11 Dec 2019 11:46:19 +0100 +Subject: [PATCH 05/17] init/Kconfig: enable -O3 for all arches + +Building a kernel with -O3 may help in hunting bugs like [1] and thus +using this switch should not be restricted to one specific arch only. + +With that, lets expose it for everyone. + +[1] https://lore.kernel.org/lkml/673b885183fb64f1cbb3ed2387524077@natalenko.name/ + +Signed-off-by: Oleksandr Natalenko +--- + init/Kconfig | 1 - + 1 file changed, 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig -index b4daad2bac23..c1e59dc04209 100644 +index 0498af567f70..3ae8678e1145 100644 --- a/init/Kconfig +++ b/init/Kconfig -@@ -1244,7 +1244,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE +@@ -1278,7 +1278,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE config CC_OPTIMIZE_FOR_PERFORMANCE_O3 bool "Optimize more for performance (-O3)" @@ -88,39 +150,52 @@ index b4daad2bac23..c1e59dc04209 100644 help Choosing this option will pass "-O3" to your compiler to optimize the kernel yet more for performance. +-- +2.28.0 + + +From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001 +From: "Jan Alexander Steffens (heftig)" +Date: Fri, 26 Oct 2018 11:22:33 +0100 +Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3 + inlining + +--- + drivers/infiniband/core/addr.c | 1 + + 1 file changed, 1 insertion(+) + diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 4f32c4062fb6..c0bf039e1b40 100644 +index 3a98439bba83..6efc4f907f58 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c -@@ -721,6 +721,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, - struct sockaddr _sockaddr; +@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, + union { struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; + struct sockaddr_ib _sockaddr_ib; } sgid_addr, dgid_addr; int ret; -diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h -index 79226ca8f80f..2a30060e7e1d 100644 ---- a/include/linux/blkdev.h -+++ b/include/linux/blkdev.h -@@ -47,7 +47,11 @@ struct blk_queue_stats; - struct blk_stat_callback; - - #define BLKDEV_MIN_RQ 4 -+#ifdef CONFIG_ZENIFY -+#define BLKDEV_MAX_RQ 512 -+#else - #define BLKDEV_MAX_RQ 128 /* Default maximum */ -+#endif - - /* Must be consistent with blk_mq_poll_stats_bkt() */ - #define BLK_MQ_POLL_STATS_BKTS 16 +-- +2.28.0 + + +From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001 +From: Etienne Juvigny +Date: Mon, 3 Sep 2018 17:36:25 +0200 +Subject: [PATCH 07/17] Zenify & stuff + +--- + init/Kconfig | 32 ++++++++++++++++++++++++++++++++ + kernel/sched/fair.c | 25 +++++++++++++++++++++++++ + mm/page-writeback.c | 8 ++++++++ + 3 files changed, 65 insertions(+) + diff --git a/init/Kconfig b/init/Kconfig -index 041f3a022122..5ed70eb1ad3a 100644 +index 3ae8678e1145..da708eed0f1e 100644 --- a/init/Kconfig +++ b/init/Kconfig -@@ -45,6 +45,38 @@ config THREAD_INFO_IN_TASK +@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK menu "General setup" @@ -160,7 +235,7 @@ index 041f3a022122..5ed70eb1ad3a 100644 bool diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 2f0a0be4d344..bada807c7e59 100644 +index 6b3b59cc51d6..2a0072192c3d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -37,8 +37,13 @@ @@ -216,9 +291,9 @@ index 2f0a0be4d344..bada807c7e59 100644 const_debug unsigned int sysctl_sched_migration_cost = 500000UL; +#endif - #ifdef CONFIG_SMP - /* -@@ -107,8 +128,12 @@ int __weak arch_asym_cpu_priority(int cpu) + int sched_thermal_decay_shift; + static int __init setup_sched_thermal_decay_shift(char *str) +@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu) * * (default: 5 msec, units: microseconds) */ @@ -229,10 +304,10 @@ index 2f0a0be4d344..bada807c7e59 100644 #endif +#endif - /* - * The margin used when comparing utilization with CPU capacity: + static inline void update_load_add(struct load_weight *lw, unsigned long inc) + { diff --git a/mm/page-writeback.c b/mm/page-writeback.c -index 337c6afb3345..9315e358f292 100644 +index 28b3e7a67565..01a1aef2b9b1 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -71,7 +71,11 @@ static long ratelimit_pages = 32; @@ -259,11 +334,29 @@ index 337c6afb3345..9315e358f292 100644 /* * vm_dirty_bytes starts at 0 (disabled) so that it is a function of +-- +2.28.0 + + +From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001 +From: Steven Barrett +Date: Sun, 16 Jan 2011 18:57:32 -0600 +Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control + +4.4: In my tests YeAH dramatically slowed down transfers over a WLAN, + reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10 + seconds (netperf TCP_STREAM) including long stalls. + + Be careful when choosing this. ~heftig +--- + net/ipv4/Kconfig | 4 ++++ + 1 file changed, 4 insertions(+) + diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig -index 80dad301361d..42b7fa7d01f8 100644 +index e64e59b536d3..bfb55ef7ebbe 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig -@@ -702,6 +702,9 @@ choice +@@ -691,6 +691,9 @@ choice config DEFAULT_VEGAS bool "Vegas" if TCP_CONG_VEGAS=y @@ -273,7 +366,7 @@ index 80dad301361d..42b7fa7d01f8 100644 config DEFAULT_VENO bool "Veno" if TCP_CONG_VENO=y -@@ -735,6 +738,7 @@ config DEFAULT_TCP_CONG +@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG default "htcp" if DEFAULT_HTCP default "hybla" if DEFAULT_HYBLA default "vegas" if DEFAULT_VEGAS @@ -281,38 +374,15 @@ index 80dad301361d..42b7fa7d01f8 100644 default "westwood" if DEFAULT_WESTWOOD default "veno" if DEFAULT_VENO default "reno" if DEFAULT_RENO +-- +2.28.0 -From: Nick Desaulniers -Date: Mon, 24 Dec 2018 13:37:41 +0200 -Subject: include/linux/compiler*.h: define asm_volatile_goto -asm_volatile_goto should also be defined for other compilers that -support asm goto. - -Fixes commit 815f0dd ("include/linux/compiler*.h: make compiler-*.h -mutually exclusive"). - -Signed-off-by: Nick Desaulniers -Signed-off-by: Miguel Ojeda - -diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h -index ba814f1..e77eeb0 100644 ---- a/include/linux/compiler_types.h -+++ b/include/linux/compiler_types.h -@@ -188,6 +188,10 @@ struct ftrace_likely_data { - #define asm_volatile_goto(x...) asm goto(x) - #endif - -+#ifndef asm_volatile_goto -+#define asm_volatile_goto(x...) asm goto(x) -+#endif -+ - /* Are two types/vars the same type (ignoring qualifiers)? */ - #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) - -From: Andy Lavr -Date: Mon, 24 Dec 2018 14:57:47 +0200 -Subject: avl: Use [defer+madvise] as default khugepaged defrag strategy +From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001 +From: Steven Barrett +Date: Wed, 28 Nov 2018 19:01:27 -0600 +Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag + strategy For some reason, the default strategy to respond to THP fault fallbacks is still just madvise, meaning stall if the program wants transparent @@ -333,20 +403,20 @@ so defer+madvise _does_ make a ton of sense. Make it the default for interactive systems, especially if the kernel maintainer left transparent hugepages on "always". -Reasoning and details in the original patch: -https://lwn.net/Articles/711248/ - -Signed-off-by: Andy Lavr +Reasoning and details in the original patch: https://lwn.net/Articles/711248/ +--- + mm/huge_memory.c | 4 ++++ + 1 file changed, 4 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index e84a10b..21d62b7 100644 +index 74300e337c3c..9277f22c10a7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly = #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE (1< +Date: Wed, 24 Oct 2018 16:58:52 -0300 +Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default + +Signed-off-by: Alexandre Frade +--- + net/sched/Kconfig | 4 ++++ + 1 file changed, 4 insertions(+) + diff --git a/net/sched/Kconfig b/net/sched/Kconfig +index 84badf00647e..6a922bca9f39 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig -@@ -429,6 +429,9 @@ - Select the queueing discipline that will be used by default - for all network devices. +@@ -471,6 +471,9 @@ choice + config DEFAULT_SFQ + bool "Stochastic Fair Queue" if NET_SCH_SFQ + config DEFAULT_CAKE -+ bool "Common Applications Kept Enhanced" if NET_SCH_CAKE ++ bool "Common Applications Kept Enhanced" if NET_SCH_CAKE + - config DEFAULT_FQ - bool "Fair Queue" if NET_SCH_FQ - -@@ -448,6 +451,7 @@ - config DEFAULT_NET_SCH - string - default "pfifo_fast" if DEFAULT_PFIFO_FAST + config DEFAULT_PFIFO_FAST + bool "Priority FIFO Fast" + endchoice +@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH + default "fq" if DEFAULT_FQ + default "fq_codel" if DEFAULT_FQ_CODEL + default "sfq" if DEFAULT_SFQ + default "cake" if DEFAULT_CAKE - default "fq" if DEFAULT_FQ - default "fq_codel" if DEFAULT_FQ_CODEL - default "sfq" if DEFAULT_SFQ + default "pfifo_fast" + endif + +-- +2.28.0 + + +From 816ee502759e954304693813bd03d94986b28dba Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Mon, 18 Feb 2019 17:40:57 +0100 +Subject: [PATCH 11/17] mm: Set watermark_scale_factor to 200 (from 10) + +Multiple users have reported it's helping reducing/eliminating stuttering +with DXVK. +--- + mm/page_alloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index a29043ea9..3fb219747 100644 +index 898ff44f2c7b..e72074034793 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -263,7 +263,7 @@ compound_page_dtor * const compound_page_dtors[] = { +@@ -330,7 +330,7 @@ int watermark_boost_factor __read_mostly; #else int watermark_boost_factor __read_mostly = 15000; #endif @@ -389,25 +488,74 @@ index a29043ea9..3fb219747 100644 static unsigned long nr_kernel_pages __initdata; static unsigned long nr_all_pages __initdata; - +-- +2.28.0 + + +From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Fri, 19 Apr 2019 12:33:38 +0200 +Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default + +The value is still pretty low, and AMD64-ABI and ELF extended numbering +supports that, so we should be fine on modern x86 systems. + +This fixes crashes in some applications using more than 65535 vmas (also +affects some windows games running in wine, such as Star Citizen). +--- + include/linux/mm.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + diff --git a/include/linux/mm.h b/include/linux/mm.h -index 80bb6408f..6c8b55cd1 100644 +index bc05c3588aa3..b0cefe94920d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h -@@ -146,8 +146,7 @@ extern int mmap_rnd_compat_bits __read_mostly; +@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page) * not a hard limit any more. Although some userspace tools can be surprised by * that. */ -#define MAPCOUNT_ELF_CORE_MARGIN (5) -#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) ++#define DEFAULT_MAX_MAP_COUNT (262144) + + extern int sysctl_max_map_count; + +-- +2.28.0 + + +From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Mon, 27 Jul 2020 00:19:18 +0200 +Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT + +Some games such as Detroit: Become Human tend to be very crash prone with +lower values. +--- + include/linux/mm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index b0cefe94920d..890165099b07 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page) + * not a hard limit any more. Although some userspace tools can be surprised by + * that. + */ +-#define DEFAULT_MAX_MAP_COUNT (262144) +#define DEFAULT_MAX_MAP_COUNT (524288) extern int sysctl_max_map_count; -From adb1f9df27f08e6488bcd80b1607987c6114a77a Mon Sep 17 00:00:00 2001 +-- +2.28.0 + + +From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Mon, 25 Nov 2019 15:13:06 -0300 -Subject: [PATCH] elevator: set default scheduler to bfq for blk-mq +Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq Signed-off-by: Alexandre Frade --- @@ -415,7 +563,7 @@ Signed-off-by: Alexandre Frade 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/elevator.c b/block/elevator.c -index 076ba7308e65..81f89095aa77 100644 +index 4eab3d70e880..79669aa39d79 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -623,15 +623,15 @@ static inline bool elv_support_iosched(struct request_queue *q) @@ -437,10 +585,14 @@ index 076ba7308e65..81f89095aa77 100644 } /* -From c3ec05777c46e19a8a26d0fc4ca0c0db8a19de97 Mon Sep 17 00:00:00 2001 +-- +2.28.0 + + +From e2111bc5989131c675659d40e0cc4f214df2f990 Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Fri, 10 May 2019 16:45:59 -0300 -Subject: [PATCH] block: set rq_affinity = 2 for full multithreading I/O +Subject: [PATCH 15/17] block: set rq_affinity = 2 for full multithreading I/O requests Signed-off-by: Alexandre Frade @@ -449,10 +601,10 @@ Signed-off-by: Alexandre Frade 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h -index f3ea78b0c91c..4dbacc6b073b 100644 +index 28efe374a2e1..d4e5d35d2ece 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h -@@ -621,7 +621,8 @@ struct request_queue { +@@ -624,7 +624,8 @@ struct request_queue { #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ @@ -462,22 +614,26 @@ index f3ea78b0c91c..4dbacc6b073b 100644 void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); -From 8171d33d0b84a953649863538fdbe4c26c035e4f Mon Sep 17 00:00:00 2001 -From: Alexandre Frade -Date: Fri, 10 May 2019 14:32:50 -0300 -Subject: [PATCH] mm: set 2 megabytes for address_space-level file read-ahead - pages size +-- +2.28.0 -Signed-off-by: Alexandre Frade + +From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Mon, 3 Aug 2020 17:05:04 +0000 +Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file + read-ahead pages size + +Signed-off-by: Alexandre Frade --- include/linux/pagemap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h -index a2adf95b3f9c..e804d9f7583a 100644 +index cf2468da68e9..007dea784451 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h -@@ -654,7 +654,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); +@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); void delete_from_page_cache_batch(struct address_space *mapping, struct pagevec *pvec); @@ -486,10 +642,14 @@ index a2adf95b3f9c..e804d9f7583a 100644 void page_cache_sync_readahead(struct address_space *, struct file_ra_state *, struct file *, pgoff_t index, unsigned long req_count); -From de7119e3db9fdb4c704355854a02a7e9fad931d4 Mon Sep 17 00:00:00 2001 +-- +2.28.0 + + +From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001 From: Steven Barrett Date: Wed, 15 Jan 2020 20:43:56 -0600 -Subject: [PATCH] ZEN: intel-pstate: Implement "enable" parameter +Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter If intel-pstate is compiled into the kernel, it will preempt the loading of acpi-cpufreq so you can take advantage of hardware p-states without @@ -517,10 +677,10 @@ selection. 2 files changed, 5 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index ade4e6ec23e03..0b613370d28d8 100644 +index fb95fad81c79..3e92fee81e33 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -1765,6 +1765,9 @@ +@@ -1857,6 +1857,9 @@ disable Do not enable intel_pstate as the default scaling driver for the supported processors @@ -531,10 +691,10 @@ index ade4e6ec23e03..0b613370d28d8 100644 Use intel_pstate as a scaling driver, but configure it to work with generic cpufreq governors (instead of diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c -index d2fa3e9ccd97c..bd10cb02fc0ff 100644 +index 36a469150ff9..aee891c9b78a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c -@@ -2826,6 +2826,8 @@ static int __init intel_pstate_setup(char *str) +@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str) pr_info("HWP disabled\n"); no_hwp = 1; } @@ -543,3 +703,6 @@ index d2fa3e9ccd97c..bd10cb02fc0ff 100644 if (!strcmp(str, "force")) force_load = 1; if (!strcmp(str, "hwp_only")) +-- +2.28.0 + diff --git a/linux59-rc-tkg/linux59-tkg-patches/0003-glitched-cfs.patch b/linux59-tkg/linux59-tkg-patches/0003-glitched-cfs.patch similarity index 100% rename from linux59-rc-tkg/linux59-tkg-patches/0003-glitched-cfs.patch rename to linux59-tkg/linux59-tkg-patches/0003-glitched-cfs.patch diff --git a/linux59-rc-tkg/linux59-tkg-patches/0005-glitched-pds.patch b/linux59-tkg/linux59-tkg-patches/0005-glitched-pds.patch similarity index 50% rename from linux59-rc-tkg/linux59-tkg-patches/0005-glitched-pds.patch rename to linux59-tkg/linux59-tkg-patches/0005-glitched-pds.patch index 63a2468..08c9ef3 100644 --- a/linux59-rc-tkg/linux59-tkg-patches/0005-glitched-pds.patch +++ b/linux59-tkg/linux59-tkg-patches/0005-glitched-pds.patch @@ -88,79 +88,3 @@ index 9270a4370d54..30d01e647417 100644 static void set_task_reclaim_state(struct task_struct *task, struct reclaim_state *rs) - -diff --git a/init/Kconfig b/init/Kconfig -index 11fd9b502d06..e9bc34d3019b 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -715,6 +715,7 @@ menu "Scheduler features" - config UCLAMP_TASK - bool "Enable utilization clamping for RT/FAIR tasks" - depends on CPU_FREQ_GOV_SCHEDUTIL -+ depends on !SCHED_PDS - help - This feature enables the scheduler to track the clamped utilization - of each CPU based on RUNNABLE tasks scheduled on that CPU. -@@ -948,7 +948,6 @@ config CGROUP_DEVICE - - config CGROUP_CPUACCT - bool "Simple CPU accounting controller" -- depends on !SCHED_PDS - help - Provides a simple controller for monitoring the - total CPU consumed by the tasks in a cgroup. -diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile -index b23231bae996..cab4e5c5b38e 100644 ---- a/kernel/sched/Makefile -+++ b/kernel/sched/Makefile -@@ -24,13 +24,13 @@ obj-y += fair.o rt.o deadline.o - obj-$(CONFIG_SMP) += cpudeadline.o topology.o stop_task.o - obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o - obj-$(CONFIG_SCHED_DEBUG) += debug.o --obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o - endif - obj-y += loadavg.o clock.o cputime.o - obj-y += idle.o - obj-y += wait.o wait_bit.o swait.o completion.o - obj-$(CONFIG_SMP) += cpupri.o pelt.o - obj-$(CONFIG_SCHEDSTATS) += stats.o -+obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o - obj-$(CONFIG_CPU_FREQ) += cpufreq.o - obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o - obj-$(CONFIG_MEMBARRIER) += membarrier.o - -diff --git a/kernel/sched/pds.c b/kernel/sched/pds.c -index 9281ad164..f09a609cf 100644 ---- a/kernel/sched/pds.c -+++ b/kernel/sched/pds.c -@@ -81,6 +81,18 @@ enum { - NR_CPU_AFFINITY_CHK_LEVEL - }; - -+/* -+ * This allows printing both to /proc/sched_debug and -+ * to the console -+ */ -+#define SEQ_printf(m, x...) \ -+ do { \ -+ if (m) \ -+ seq_printf(m, x); \ -+ else \ -+ pr_cont(x); \ -+ } while (0) -+ - static inline void print_scheduler_version(void) - { - printk(KERN_INFO "pds: PDS-mq CPU Scheduler 0.99o by Alfred Chen.\n"); -@@ -6353,7 +6365,10 @@ void ia64_set_curr_task(int cpu, struct task_struct *p) - #ifdef CONFIG_SCHED_DEBUG - void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, - struct seq_file *m) --{} -+{ -+ SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns), -+ get_nr_threads(p)); -+} - - void proc_sched_set_task(struct task_struct *p) - {} diff --git a/linux59-rc-tkg/linux59-tkg-patches/0006-add-acs-overrides_iommu.patch b/linux59-tkg/linux59-tkg-patches/0006-add-acs-overrides_iommu.patch similarity index 100% rename from linux59-rc-tkg/linux59-tkg-patches/0006-add-acs-overrides_iommu.patch rename to linux59-tkg/linux59-tkg-patches/0006-add-acs-overrides_iommu.patch diff --git a/linux59-tkg/linux59-tkg-patches/0009-glitched-bmq.patch b/linux59-tkg/linux59-tkg-patches/0009-glitched-bmq.patch new file mode 100644 index 0000000..e42e522 --- /dev/null +++ b/linux59-tkg/linux59-tkg-patches/0009-glitched-bmq.patch @@ -0,0 +1,90 @@ +From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Wed, 4 Jul 2018 04:30:08 +0200 +Subject: glitched - BMQ + +diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz +index 2a202a846757..1d9c7ed79b11 100644 +--- a/kernel/Kconfig.hz ++++ b/kernel/Kconfig.hz +@@ -4,7 +4,7 @@ + + choice + prompt "Timer frequency" +- default HZ_250 ++ default HZ_500 + help + Allows the configuration of the timer frequency. It is customary + to have the timer interrupt run at 1000 Hz but 100 Hz may be more +@@ -39,6 +39,13 @@ choice + on SMP and NUMA systems and exactly dividing by both PAL and + NTSC frame rates for video and multimedia work. + ++ config HZ_500 ++ bool "500 HZ" ++ help ++ 500 Hz is a balanced timer frequency. Provides fast interactivity ++ on desktops with great smoothness without increasing CPU power ++ consumption and sacrificing the battery life on laptops. ++ + config HZ_1000 + bool "1000 HZ" + help +@@ -52,6 +59,7 @@ config HZ + default 100 if HZ_100 + default 250 if HZ_250 + default 300 if HZ_300 ++ default 500 if HZ_500 + default 1000 if HZ_1000 + + config SCHED_HRTICK + +diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz +index 2a202a846757..1d9c7ed79b11 100644 +--- a/kernel/Kconfig.hz ++++ b/kernel/Kconfig.hz +@@ -4,7 +4,7 @@ + + choice + prompt "Timer frequency" +- default HZ_500 ++ default HZ_750 + help + Allows the configuration of the timer frequency. It is customary + to have the timer interrupt run at 1000 Hz but 100 Hz may be more +@@ -46,6 +46,13 @@ choice + on desktops with great smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + ++ config HZ_750 ++ bool "750 HZ" ++ help ++ 750 Hz is a good timer frequency for desktops. Provides fast ++ interactivity with great smoothness without sacrificing too ++ much throughput. ++ + config HZ_1000 + bool "1000 HZ" + help +@@ -60,6 +67,7 @@ config HZ + default 250 if HZ_250 + default 300 if HZ_300 + default 500 if HZ_500 ++ default 750 if HZ_750 + default 1000 if HZ_1000 + + config SCHED_HRTICK + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 9270a4370d54..30d01e647417 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -169,7 +169,7 @@ + /* + * From 0 .. 200. Higher means more swappy. + */ +-int vm_swappiness = 60; ++int vm_swappiness = 20; + + static void set_task_reclaim_state(struct task_struct *task, + struct reclaim_state *rs) diff --git a/linux59-rc-tkg/linux59-tkg-patches/0005-glitched-ondemand-pds.patch b/linux59-tkg/linux59-tkg-patches/0009-glitched-ondemand-bmq.patch similarity index 88% rename from linux59-rc-tkg/linux59-tkg-patches/0005-glitched-ondemand-pds.patch rename to linux59-tkg/linux59-tkg-patches/0009-glitched-ondemand-bmq.patch index c1929e8..a926040 100644 --- a/linux59-rc-tkg/linux59-tkg-patches/0005-glitched-ondemand-pds.patch +++ b/linux59-tkg/linux59-tkg-patches/0009-glitched-ondemand-bmq.patch @@ -6,7 +6,7 @@ index 6b423eebfd5d..61e3271675d6 100644 #include "cpufreq_ondemand.h" /* On-demand governor macros */ --#define DEF_FREQUENCY_UP_THRESHOLD (63) +-#define DEF_FREQUENCY_UP_THRESHOLD (80) -#define DEF_SAMPLING_DOWN_FACTOR (1) +#define DEF_FREQUENCY_UP_THRESHOLD (55) +#define DEF_SAMPLING_DOWN_FACTOR (5) @@ -15,4 +15,4 @@ index 6b423eebfd5d..61e3271675d6 100644 +#define MICRO_FREQUENCY_UP_THRESHOLD (63) #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (1) - #define MAX_FREQUENCY_UP_THRESHOLD (100) + #define MAX_FREQUENCY_UP_THRESHOLD (100) diff --git a/linux59-rc-tkg/linux59-tkg-patches/0005-v5.9_undead-pds099o.patch b/linux59-tkg/linux59-tkg-patches/0009-prjc_v5.9-r0.patch similarity index 73% rename from linux59-rc-tkg/linux59-tkg-patches/0005-v5.9_undead-pds099o.patch rename to linux59-tkg/linux59-tkg-patches/0009-prjc_v5.9-r0.patch index 97469ba..cd0d562 100644 --- a/linux59-rc-tkg/linux59-tkg-patches/0005-v5.9_undead-pds099o.patch +++ b/linux59-tkg/linux59-tkg-patches/0009-prjc_v5.9-r0.patch @@ -1,153 +1,156 @@ -From 68f1a9541ef3185b1021e8e54d2712c7039418d7 Mon Sep 17 00:00:00 2001 -From: Tk-Glitch -Date: Mon, 15 Jun 2020 23:58:41 +0200 -Subject: PDS 099o, initial 5.8 rebase - - -diff --git a/Documentation/scheduler/sched-PDS-mq.txt b/Documentation/scheduler/sched-PDS-mq.txt +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index a1068742a6df..b97a9697fde4 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4611,6 +4611,12 @@ + + sbni= [NET] Granch SBNI12 leased line adapter + ++ sched_timeslice= ++ [KNL] Time slice in us for BMQ/PDS scheduler. ++ Format: (must be >= 1000) ++ Default: 4000 ++ See Documentation/scheduler/sched-BMQ.txt ++ + sched_debug [KNL] Enables verbose scheduler debug messages. + + schedstats= [KNL,X86] Enable or disable scheduled statistics. +diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst +index d4b32cc32bb7..14118e5168ef 100644 +--- a/Documentation/admin-guide/sysctl/kernel.rst ++++ b/Documentation/admin-guide/sysctl/kernel.rst +@@ -1515,3 +1515,13 @@ is 10 seconds. + + The softlockup threshold is (``2 * watchdog_thresh``). Setting this + tunable to zero will disable lockup detection altogether. ++ ++yield_type: ++=========== ++ ++BMQ/PDS CPU scheduler only. This determines what type of yield calls ++to sched_yield will perform. ++ ++ 0 - No yield. ++ 1 - Deboost and requeue task. (default) ++ 2 - Set run queue skip task. +diff --git a/Documentation/scheduler/sched-BMQ.txt b/Documentation/scheduler/sched-BMQ.txt new file mode 100644 -index 000000000000..709e86f6487e +index 000000000000..05c84eec0f31 --- /dev/null -+++ b/Documentation/scheduler/sched-PDS-mq.txt -@@ -0,0 +1,56 @@ -+ Priority and Deadline based Skiplist multiple queue Scheduler -+ ------------------------------------------------------------- ++++ b/Documentation/scheduler/sched-BMQ.txt +@@ -0,0 +1,110 @@ ++ BitMap queue CPU Scheduler ++ -------------------------- + +CONTENT +======== + -+ 0. Development -+ 1. Overview -+ 1.1 Design goal -+ 1.2 Design summary -+ 2. Design Detail -+ 2.1 Skip list implementation -+ 2.2 Task preempt -+ 2.3 Task policy, priority and deadline -+ 2.4 Task selection -+ 2.5 Run queue balance -+ 2.6 Task migration ++ Background ++ Design ++ Overview ++ Task policy ++ Priority management ++ BitMap Queue ++ CPU Assignment and Migration + + -+0. Development -+============== ++Background ++========== + -+Priority and Deadline based Skiplist multiple queue scheduler, referred to as -+PDS from here on, is developed upon the enhancement patchset VRQ(Variable Run -+Queue) for BFS(Brain Fuck Scheduler by Con Kolivas). PDS inherits the existing -+design from VRQ and inspired by the introduction of skiplist data structure -+to the scheduler by Con Kolivas. However, PDS is different from MuQSS(Multiple -+Queue Skiplist Scheduler, the successor after BFS) in many ways. ++BitMap Queue CPU scheduler, referred to as BMQ from here on, is an evolution ++of previous Priority and Deadline based Skiplist multiple queue scheduler(PDS), ++and inspired by Zircon scheduler. The goal of it is to keep the scheduler code ++simple, while efficiency and scalable for interactive tasks, such as desktop, ++movie playback and gaming etc. + -+1. Overview -+=========== ++Design ++====== + -+1.1 Design goal -+--------------- ++Overview ++-------- + -+PDS is designed to make the cpu process scheduler code to be simple, but while -+efficiency and scalable. Be Simple, the scheduler code will be easy to be read -+and the behavious of scheduler will be easy to predict. Be efficiency, the -+scheduler shall be well balance the thoughput performance and task interactivity -+at the same time for different properties the tasks behave. Be scalable, the -+performance of the scheduler should be in good shape with the glowing of -+workload or with the growing of the cpu numbers. ++BMQ use per CPU run queue design, each CPU(logical) has it's own run queue, ++each CPU is responsible for scheduling the tasks that are putting into it's ++run queue. + -+1.2 Design summary -+------------------ ++The run queue is a set of priority queues. Note that these queues are fifo ++queue for non-rt tasks or priority queue for rt tasks in data structure. See ++BitMap Queue below for details. BMQ is optimized for non-rt tasks in the fact ++that most applications are non-rt tasks. No matter the queue is fifo or ++priority, In each queue is an ordered list of runnable tasks awaiting execution ++and the data structures are the same. When it is time for a new task to run, ++the scheduler simply looks the lowest numbered queueue that contains a task, ++and runs the first task from the head of that queue. And per CPU idle task is ++also in the run queue, so the scheduler can always find a task to run on from ++its run queue. + -+PDS is described as a multiple run queues cpu scheduler. Each cpu has its own -+run queue. A heavry customized skiplist is used as the backend data structure -+of the cpu run queue. Tasks in run queue is sorted by priority then virtual -+deadline(simplfy to just deadline from here on). In PDS, balance action among -+run queues are kept as less as possible to reduce the migration cost. Cpumask -+data structure is widely used in cpu affinity checking and cpu preemption/ -+selection to make PDS scalable with increasing cpu number. ++Each task will assigned the same timeslice(default 4ms) when it is picked to ++start running. Task will be reinserted at the end of the appropriate priority ++queue when it uses its whole timeslice. When the scheduler selects a new task ++from the priority queue it sets the CPU's preemption timer for the remainder of ++the previous timeslice. When that timer fires the scheduler will stop execution ++on that task, select another task and start over again. + ++If a task blocks waiting for a shared resource then it's taken out of its ++priority queue and is placed in a wait queue for the shared resource. When it ++is unblocked it will be reinserted in the appropriate priority queue of an ++eligible CPU. + -+To be continued... -diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c -index f18d5067cd0f..fe489fc01c73 100644 ---- a/arch/powerpc/platforms/cell/spufs/sched.c -+++ b/arch/powerpc/platforms/cell/spufs/sched.c -@@ -51,11 +51,6 @@ static struct task_struct *spusched_task; - static struct timer_list spusched_timer; - static struct timer_list spuloadavg_timer; - --/* -- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). -- */ --#define NORMAL_PRIO 120 -- - /* - * Frequency of the spu scheduler tick. By default we do one SPU scheduler - * tick for every 10 CPU scheduler ticks. -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index 2d3f963fd6f1..5f41ead019b1 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -1006,6 +1006,22 @@ config NR_CPUS - config SCHED_SMT - def_bool y if SMP - -+config SMT_NICE -+ bool "SMT (Hyperthreading) aware nice priority and policy support" -+ depends on SCHED_PDS && SCHED_SMT -+ default y -+ help -+ Enabling Hyperthreading on Intel CPUs decreases the effectiveness -+ of the use of 'nice' levels and different scheduling policies -+ (e.g. realtime) due to sharing of CPU power between hyperthreads. -+ SMT nice support makes each logical CPU aware of what is running on -+ its hyperthread siblings, maintaining appropriate distribution of -+ CPU according to nice levels and scheduling policies at the expense -+ of slightly increased overhead. ++Task policy ++----------- + -+ If unsure say Y here. ++BMQ supports DEADLINE, FIFO, RR, NORMAL, BATCH and IDLE task policy like the ++mainline CFS scheduler. But BMQ is heavy optimized for non-rt task, that's ++NORMAL/BATCH/IDLE policy tasks. Below is the implementation detail of each ++policy. + ++DEADLINE ++ It is squashed as priority 0 FIFO task. + - config SCHED_MC - def_bool y - prompt "Multi-core scheduler support" -diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c -index 737ff3b9c2c0..b5bc5a1b6de7 100644 ---- a/drivers/cpufreq/cpufreq_conservative.c -+++ b/drivers/cpufreq/cpufreq_conservative.c -@@ -28,8 +28,8 @@ struct cs_dbs_tuners { - }; - - /* Conservative governor macros */ --#define DEF_FREQUENCY_UP_THRESHOLD (80) --#define DEF_FREQUENCY_DOWN_THRESHOLD (20) -+#define DEF_FREQUENCY_UP_THRESHOLD (63) -+#define DEF_FREQUENCY_DOWN_THRESHOLD (26) - #define DEF_FREQUENCY_STEP (5) - #define DEF_SAMPLING_DOWN_FACTOR (1) - #define MAX_SAMPLING_DOWN_FACTOR (10) -diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c -index 82a4d37ddecb..1130e0f5db72 100644 ---- a/drivers/cpufreq/cpufreq_ondemand.c -+++ b/drivers/cpufreq/cpufreq_ondemand.c -@@ -18,7 +18,7 @@ - #include "cpufreq_ondemand.h" - - /* On-demand governor macros */ --#define DEF_FREQUENCY_UP_THRESHOLD (80) -+#define DEF_FREQUENCY_UP_THRESHOLD (63) - #define DEF_SAMPLING_DOWN_FACTOR (1) - #define MAX_SAMPLING_DOWN_FACTOR (100000) - #define MICRO_FREQUENCY_UP_THRESHOLD (95) -@@ -127,7 +127,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) - } - - /* -- * Every sampling_rate, we check, if current idle time is less than 20% -+ * Every sampling_rate, we check, if current idle time is less than 37% - * (default), then we try to increase frequency. Else, we adjust the frequency - * proportional to load. - */ ++FIFO/RR ++ All RT tasks share one single priority queue in BMQ run queue designed. The ++complexity of insert operation is O(n). BMQ is not designed for system runs ++with major rt policy tasks. ++ ++NORMAL/BATCH/IDLE ++ BATCH and IDLE tasks are treated as the same policy. They compete CPU with ++NORMAL policy tasks, but they just don't boost. To control the priority of ++NORMAL/BATCH/IDLE tasks, simply use nice level. ++ ++ISO ++ ISO policy is not supported in BMQ. Please use nice level -20 NORMAL policy ++task instead. ++ ++Priority management ++------------------- ++ ++RT tasks have priority from 0-99. For non-rt tasks, there are three different ++factors used to determine the effective priority of a task. The effective ++priority being what is used to determine which queue it will be in. ++ ++The first factor is simply the task’s static priority. Which is assigned from ++task's nice level, within [-20, 19] in userland's point of view and [0, 39] ++internally. ++ ++The second factor is the priority boost. This is a value bounded between ++[-MAX_PRIORITY_ADJ, MAX_PRIORITY_ADJ] used to offset the base priority, it is ++modified by the following cases: ++ ++*When a thread has used up its entire timeslice, always deboost its boost by ++increasing by one. ++*When a thread gives up cpu control(voluntary or non-voluntary) to reschedule, ++and its switch-in time(time after last switch and run) below the thredhold ++based on its priority boost, will boost its boost by decreasing by one buti is ++capped at 0 (won’t go negative). ++ ++The intent in this system is to ensure that interactive threads are serviced ++quickly. These are usually the threads that interact directly with the user ++and cause user-perceivable latency. These threads usually do little work and ++spend most of their time blocked awaiting another user event. So they get the ++priority boost from unblocking while background threads that do most of the ++processing receive the priority penalty for using their entire timeslice. diff --git a/fs/proc/base.c b/fs/proc/base.c -index eb2255e95f62..62b8cedbccb6 100644 +index 617db4e0faa0..f85926764f9a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, @@ -159,99 +162,82 @@ index eb2255e95f62..62b8cedbccb6 100644 (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); -diff --git a/include/linux/init_task.h b/include/linux/init_task.h -index 2c620d7ac432..1a7987c40c80 100644 ---- a/include/linux/init_task.h -+++ b/include/linux/init_task.h -@@ -36,7 +36,11 @@ extern struct cred init_cred; - #define INIT_PREV_CPUTIME(x) - #endif - -+#ifdef CONFIG_SCHED_PDS -+#define INIT_TASK_COMM "PDS" -+#else - #define INIT_TASK_COMM "swapper" -+#endif /* !CONFIG_SCHED_PDS */ - - /* Attach to the init_task data structure for proper alignment */ - #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK -diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h -index fed6ba96c527..f03a5ee419a1 100644 ---- a/include/linux/jiffies.h -+++ b/include/linux/jiffies.h -@@ -169,7 +169,7 @@ static inline u64 get_jiffies_64(void) - * Have the 32 bit jiffies value wrap 5 minutes after boot - * so jiffies wrap bugs show up earlier. - */ --#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) -+#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ)) - - /* - * Change timeval to jiffies, trying to avoid the -diff --git a/kernel/smp.c b/kernel/smp.c -index 4418f5cb8324..2b51afac5b06 100644 ---- a/kernel/smp.c -+++ b/kernel/smp.c +diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h +index 8874f681b056..59eb72bf7d5f 100644 +--- a/include/asm-generic/resource.h ++++ b/include/asm-generic/resource.h +@@ -23,7 +23,7 @@ + [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY }, \ + [RLIMIT_SIGPENDING] = { 0, 0 }, \ + [RLIMIT_MSGQUEUE] = { MQ_BYTES_MAX, MQ_BYTES_MAX }, \ +- [RLIMIT_NICE] = { 0, 0 }, \ ++ [RLIMIT_NICE] = { 30, 30 }, \ + [RLIMIT_RTPRIO] = { 0, 0 }, \ + [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, \ + } diff --git a/include/linux/sched.h b/include/linux/sched.h -index 4418f5cb8324..2b51afac5b06 100644 +index afe01e232935..8918609cb9f0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -31,6 +31,7 @@ - #include +@@ -34,6 +34,7 @@ #include + #include #include +#include /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; -@@ -652,9 +653,13 @@ struct task_struct { - unsigned int flags; +@@ -652,12 +653,18 @@ struct task_struct { unsigned int ptrace; --#ifdef CONFIG_SMP -+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_PDS) - int on_cpu; -+#endif -+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS) + #ifdef CONFIG_SMP +- int on_cpu; struct __call_single_node wake_entry; +#endif ++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) ++ int on_cpu; ++#endif ++ +#ifdef CONFIG_SMP #ifdef CONFIG_THREAD_INFO_IN_TASK /* Current CPU: */ unsigned int cpu; -@@ -663,6 +668,7 @@ struct task_struct { + #endif ++#ifndef CONFIG_SCHED_ALT + unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; struct task_struct *last_wakee; - -+#ifndef CONFIG_SCHED_PDS - /* - * recent_used_cpu is initially set as the last CPU used by a task - * that wakes affine another task. Waker/wakee relationships can -@@ -671,6 +677,7 @@ struct task_struct { - * used CPU that may be idle. +@@ -671,6 +678,7 @@ struct task_struct { */ int recent_used_cpu; -+#endif /* CONFIG_SCHED_PDS */ int wake_cpu; ++#endif /* !CONFIG_SCHED_ALT */ #endif int on_rq; -@@ -680,13 +687,27 @@ struct task_struct { + +@@ -679,13 +687,33 @@ struct task_struct { int normal_prio; unsigned int rt_priority; ++#ifdef CONFIG_SCHED_ALT ++ u64 last_ran; ++ s64 time_slice; ++#ifdef CONFIG_SCHED_BMQ ++ int boost_prio; ++ int bmq_idx; ++ struct list_head bmq_node; ++#endif /* CONFIG_SCHED_BMQ */ +#ifdef CONFIG_SCHED_PDS -+ int time_slice; + u64 deadline; ++ u64 priodl; + /* skip list level */ + int sl_level; + /* skip list node */ + struct skiplist_node sl_node; -+ /* 8bits prio and 56bits deadline for quick processing */ -+ u64 priodl; -+ u64 last_ran; ++#endif /* CONFIG_SCHED_PDS */ + /* sched_clock time spent running */ + u64 sched_time; -+#else /* CONFIG_SCHED_PDS */ ++#else /* !CONFIG_SCHED_ALT */ const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; @@ -263,57 +249,45 @@ index 4418f5cb8324..2b51afac5b06 100644 - struct sched_dl_entity dl; #ifdef CONFIG_UCLAMP_TASK - /* Clamp values requested for a scheduling entity */ -@@ -1306,6 +1327,29 @@ struct task_struct { + /* +@@ -1332,6 +1360,15 @@ struct task_struct { */ }; -+#ifdef CONFIG_SCHED_PDS -+void cpu_scaling(int cpu); -+void cpu_nonscaling(int cpu); ++#ifdef CONFIG_SCHED_ALT +#define tsk_seruntime(t) ((t)->sched_time) +/* replace the uncertian rt_timeout with 0UL */ +#define tsk_rttimeout(t) (0UL) -+ -+#define task_running_idle(p) ((p)->prio == IDLE_PRIO) +#else /* CFS */ -+extern int runqueue_is_locked(int cpu); -+static inline void cpu_scaling(int cpu) -+{ -+} -+ -+static inline void cpu_nonscaling(int cpu) -+{ -+} +#define tsk_seruntime(t) ((t)->se.sum_exec_runtime) +#define tsk_rttimeout(t) ((t)->rt.timeout) -+ -+#define iso_task(p) (false) -+#endif /* CONFIG_SCHED_PDS */ ++#endif /* !CONFIG_SCHED_ALT */ + static inline struct pid *task_pid(struct task_struct *task) { return task->thread_pid; diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h -index 1aff00b65f3c..a5e5fc2c9170 100644 +index 1aff00b65f3c..179d77c8360e 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h -@@ -1,5 +1,22 @@ +@@ -1,5 +1,24 @@ /* SPDX-License-Identifier: GPL-2.0 */ -+#ifdef CONFIG_SCHED_PDS -+ -+#define __tsk_deadline(p) ((p)->deadline) -+ -+static inline int dl_prio(int prio) -+{ -+ return 1; -+} ++#ifdef CONFIG_SCHED_ALT + +static inline int dl_task(struct task_struct *p) +{ -+ return 1; ++ return 0; +} ++ ++#ifdef CONFIG_SCHED_BMQ ++#define __tsk_deadline(p) (0UL) ++#endif ++ ++#ifdef CONFIG_SCHED_PDS ++#define __tsk_deadline(p) ((p)->priodl) ++#endif ++ +#else + +#define __tsk_deadline(p) ((p)->dl.deadline) @@ -321,118 +295,107 @@ index 1aff00b65f3c..a5e5fc2c9170 100644 /* * SCHED_DEADLINE tasks has negative priorities, reflecting * the fact that any of them has higher prio than RT and -@@ -19,6 +36,7 @@ static inline int dl_task(struct task_struct *p) +@@ -19,6 +38,7 @@ static inline int dl_task(struct task_struct *p) { return dl_prio(p->prio); } -+#endif /* CONFIG_SCHED_PDS */ ++#endif /* CONFIG_SCHED_ALT */ static inline bool dl_time_before(u64 a, u64 b) { diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h -index 7d64feafc408..fba04bb91492 100644 +index 7d64feafc408..42730d27ceb5 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h -@@ -20,7 +20,18 @@ +@@ -20,11 +20,20 @@ */ #define MAX_USER_RT_PRIO 100 + -+#ifdef CONFIG_SCHED_PDS -+#define ISO_PRIO (MAX_USER_RT_PRIO) -+ -+#define MAX_RT_PRIO ((MAX_USER_RT_PRIO) + 1) -+ -+#define NORMAL_PRIO (MAX_RT_PRIO) -+#define IDLE_PRIO ((MAX_RT_PRIO) + 1) -+#define PRIO_LIMIT ((IDLE_PRIO) + 1) -+#else /* !CONFIG_SCHED_PDS */ #define MAX_RT_PRIO MAX_USER_RT_PRIO -+#endif /* CONFIG_SCHED_PDS */ #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) + ++/* +/- priority levels from the base priority */ ++#ifdef CONFIG_SCHED_BMQ ++#define MAX_PRIORITY_ADJ 7 ++#endif ++#ifdef CONFIG_SCHED_PDS ++#define MAX_PRIORITY_ADJ 0 ++#endif ++ + /* + * Convert user-nice values [ -20 ... 0 ... 19 ] + * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h -index e5af028c08b4..a96012e6f15e 100644 +index e5af028c08b4..0a7565d0d3cf 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) if (policy == SCHED_FIFO || policy == SCHED_RR) return true; -+#ifndef CONFIG_SCHED_PDS ++#ifndef CONFIG_SCHED_ALT if (policy == SCHED_DEADLINE) return true; +#endif return false; } -diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h -index 38359071236a..90328ccd527f 100644 ---- a/include/linux/sched/task.h -+++ b/include/linux/sched/task.h -@@ -106,7 +106,7 @@ extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); - extern void free_task(struct task_struct *tsk); - - /* sched_exec is called by processes performing an exec */ --#ifdef CONFIG_SMP -+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS) - extern void sched_exec(void); - #else - #define sched_exec() {} diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h new file mode 100644 -index 000000000000..713fedd8034f +index 000000000000..47ca955a451d --- /dev/null +++ b/include/linux/skip_list.h @@ -0,0 +1,177 @@ +/* -+ Copyright (C) 2016 Alfred Chen. -+ -+ Code based on Con Kolivas's skip list implementation for BFS, and -+ which is based on example originally by William Pugh. -+ -+Skip Lists are a probabilistic alternative to balanced trees, as -+described in the June 1990 issue of CACM and were invented by -+William Pugh in 1987. -+ -+A couple of comments about this implementation: -+ -+This file only provides a infrastructure of skip list. -+ -+skiplist_node is embedded into container data structure, to get rid the -+dependency of kmalloc/kfree operation in scheduler code. -+ -+A customized search function should be defined using DEFINE_SKIPLIST_INSERT -+macro and be used for skip list insert operation. -+ -+Random Level is also not defined in this file, instead, it should be customized -+implemented and set to node->level then pass to the customized skiplist_insert -+function. -+ -+Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1) -+ -+NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16, -+considering that there will be 256 entries to enable the top level when using -+random level p=0.5, and that number is more than enough for a run queue usage -+in a scheduler usage. And it also help to reduce the memory usage of the -+embedded skip list node in task_struct to about 50%. -+ -+The insertion routine has been implemented so as to use the -+dirty hack described in the CACM paper: if a random level is -+generated that is more than the current maximum level, the -+current maximum level plus one is used instead. -+ -+BFS Notes: In this implementation of skiplists, there are bidirectional -+next/prev pointers and the insert function returns a pointer to the actual -+node the value is stored. The key here is chosen by the scheduler so as to -+sort tasks according to the priority list requirements and is no longer used -+by the scheduler after insertion. The scheduler lookup, however, occurs in -+O(1) time because it is always the first item in the level 0 linked list. -+Since the task struct stores a copy of the node pointer upon skiplist_insert, -+it can also remove it much faster than the original implementation with the -+aid of prev<->next pointer manipulation and no searching. -+*/ ++ * Copyright (C) 2016 Alfred Chen. ++ * ++ * Code based on Con Kolivas's skip list implementation for BFS, and ++ * which is based on example originally by William Pugh. ++ * ++ * Skip Lists are a probabilistic alternative to balanced trees, as ++ * described in the June 1990 issue of CACM and were invented by ++ * William Pugh in 1987. ++ * ++ * A couple of comments about this implementation: ++ * ++ * This file only provides a infrastructure of skip list. ++ * ++ * skiplist_node is embedded into container data structure, to get rid ++ * the dependency of kmalloc/kfree operation in scheduler code. ++ * ++ * A customized search function should be defined using DEFINE_SKIPLIST_INSERT ++ * macro and be used for skip list insert operation. ++ * ++ * Random Level is also not defined in this file, instead, it should be ++ * customized implemented and set to node->level then pass to the customized ++ * skiplist_insert function. ++ * ++ * Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1) ++ * ++ * NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16, ++ * considering that there will be 256 entries to enable the top level when using ++ * random level p=0.5, and that number is more than enough for a run queue usage ++ * in a scheduler usage. And it also help to reduce the memory usage of the ++ * embedded skip list node in task_struct to about 50%. ++ * ++ * The insertion routine has been implemented so as to use the ++ * dirty hack described in the CACM paper: if a random level is ++ * generated that is more than the current maximum level, the ++ * current maximum level plus one is used instead. ++ * ++ * BFS Notes: In this implementation of skiplists, there are bidirectional ++ * next/prev pointers and the insert function returns a pointer to the actual ++ * node the value is stored. The key here is chosen by the scheduler so as to ++ * sort tasks according to the priority list requirements and is no longer used ++ * by the scheduler after insertion. The scheduler lookup, however, occurs in ++ * O(1) time because it is always the first item in the level 0 linked list. ++ * Since the task struct stores a copy of the node pointer upon skiplist_insert, ++ * it can also remove it much faster than the original implementation with the ++ * aid of prev<->next pointer manipulation and no searching. ++ */ +#ifndef _LINUX_SKIP_LIST_H +#define _LINUX_SKIP_LIST_H + @@ -455,7 +418,7 @@ index 000000000000..713fedd8034f + +static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node) +{ -+ /* only level 0 ->next matters in skiplist_empty()*/ ++ /* only level 0 ->next matters in skiplist_empty() */ + WRITE_ONCE(node->next[0], node); +} + @@ -563,223 +526,125 @@ index 000000000000..713fedd8034f + return (node->prev[0] == head); +} +#endif /* _LINUX_SKIP_LIST_H */ -diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h -index 3bac0a8ceab2..d6d384ddb57d 100644 ---- a/include/uapi/linux/sched.h -+++ b/include/uapi/linux/sched.h -@@ -115,7 +115,10 @@ struct clone_args { - #define SCHED_FIFO 1 - #define SCHED_RR 2 - #define SCHED_BATCH 3 --/* SCHED_ISO: reserved but not implemented yet */ -+/* SCHED_ISO: Implemented in BFS/MuQSSPDS only */ -+ -+#define SCHED_ISO 4 -+ - #define SCHED_IDLE 5 - #define SCHED_DEADLINE 6 - diff --git a/init/Kconfig b/init/Kconfig -index 74a5ac65644f..e4fd406b58dd 100644 +index d6a0b31b13dc..2122dba5596f 100644 --- a/init/Kconfig +++ b/init/Kconfig -@@ -61,6 +61,21 @@ config THREAD_INFO_IN_TASK +@@ -770,9 +770,39 @@ config GENERIC_SCHED_CLOCK - menu "General setup" + menu "Scheduler features" ++menuconfig SCHED_ALT ++ bool "Alternative CPU Schedulers" ++ default y ++ help ++ This feature enable alternative CPU scheduler" ++ ++if SCHED_ALT ++ ++choice ++ prompt "Alternative CPU Scheduler" ++ default SCHED_BMQ ++ ++config SCHED_BMQ ++ bool "BMQ CPU scheduler" ++ help ++ The BitMap Queue CPU scheduler for excellent interactivity and ++ responsiveness on the desktop and solid scalability on normal ++ hardware and commodity servers. ++ +config SCHED_PDS -+ bool "PDS-mq cpu scheduler" ++ bool "PDS CPU scheduler" + help + The Priority and Deadline based Skip list multiple queue CPU -+ Scheduler for excellent interactivity and responsiveness on the -+ desktop and solid scalability on normal hardware and commodity -+ servers. ++ Scheduler. + -+ Currently incompatible with the Group CPU scheduler, and RCU TORTURE -+ TEST so these options are disabled. ++endchoice + -+ Say Y here. -+ default y ++endif + -+ - config BROKEN - bool - -@@ -777,6 +792,7 @@ config NUMA_BALANCING + config UCLAMP_TASK + bool "Enable utilization clamping for RT/FAIR tasks" + depends on CPU_FREQ_GOV_SCHEDUTIL ++ depends on !SCHED_ALT + help + This feature enables the scheduler to track the clamped utilization + of each CPU based on RUNNABLE tasks scheduled on that CPU. +@@ -858,6 +888,7 @@ config NUMA_BALANCING depends on ARCH_SUPPORTS_NUMA_BALANCING depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY depends on SMP && NUMA && MIGRATION -+ depends on !SCHED_PDS ++ depends on !SCHED_ALT help This option adds support for automatic NUMA aware memory/task placement. The mechanism is quite primitive and is based on migrating memory when -@@ -878,7 +894,7 @@ menuconfig CGROUP_SCHED +@@ -944,7 +975,7 @@ menuconfig CGROUP_SCHED bandwidth allocation to such task groups. It uses cgroups to group tasks. -if CGROUP_SCHED -+if CGROUP_SCHED && !SCHED_PDS ++if CGROUP_SCHED && !SCHED_ALT config FAIR_GROUP_SCHED bool "Group scheduling for SCHED_OTHER" depends on CGROUP_SCHED -@@ -1007,6 +1023,7 @@ config CGROUP_DEVICE - - config CGROUP_CPUACCT - bool "Simple CPU accounting controller" -+ depends on !SCHED_PDS - help - Provides a simple controller for monitoring the - total CPU consumed by the tasks in a cgroup. -@@ -1134,6 +1151,7 @@ config CHECKPOINT_RESTORE +@@ -1200,6 +1231,7 @@ config CHECKPOINT_RESTORE config SCHED_AUTOGROUP bool "Automatic process group scheduling" -+ depends on !SCHED_PDS ++ depends on !SCHED_ALT select CGROUPS select CGROUP_SCHED select FAIR_GROUP_SCHED diff --git a/init/init_task.c b/init/init_task.c -index bd403ed3e418..162d3deddd45 100644 +index f6889fce64af..5a23122f3d2c 100644 --- a/init/init_task.c +++ b/init/init_task.c -@@ -59,6 +59,127 @@ struct task_struct init_task - __init_task_data - #endif - = { +@@ -75,9 +75,15 @@ struct task_struct init_task + .stack = init_stack, + .usage = REFCOUNT_INIT(2), + .flags = PF_KTHREAD, ++#ifdef CONFIG_SCHED_ALT ++ .prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, ++ .static_prio = DEFAULT_PRIO, ++ .normal_prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, ++#else + .prio = MAX_PRIO - 20, + .static_prio = MAX_PRIO - 20, + .normal_prio = MAX_PRIO - 20, ++#endif + .policy = SCHED_NORMAL, + .cpus_ptr = &init_task.cpus_mask, + .cpus_mask = CPU_MASK_ALL, +@@ -87,6 +93,19 @@ struct task_struct init_task + .restart_block = { + .fn = do_no_restart_syscall, + }, ++#ifdef CONFIG_SCHED_ALT ++#ifdef CONFIG_SCHED_BMQ ++ .boost_prio = 0, ++ .bmq_idx = 15, ++ .bmq_node = LIST_HEAD_INIT(init_task.bmq_node), ++#endif +#ifdef CONFIG_SCHED_PDS -+#ifdef CONFIG_THREAD_INFO_IN_TASK -+ .thread_info = INIT_THREAD_INFO(init_task), -+ .stack_refcount = ATOMIC_INIT(1), ++ .deadline = 0, ++ .sl_level = 0, ++ .sl_node = SKIPLIST_NODE_INIT(init_task.sl_node), +#endif -+ .state = 0, -+ .stack = init_stack, -+ .usage = ATOMIC_INIT(2), -+ .flags = PF_KTHREAD, -+ .prio = NORMAL_PRIO, -+ .static_prio = MAX_PRIO - 20, -+ .normal_prio = NORMAL_PRIO, -+ .deadline = 0, /* PDS only */ -+ .policy = SCHED_NORMAL, -+ .cpus_ptr = &init_task.cpus_mask, -+ .cpus_mask = CPU_MASK_ALL, -+ .nr_cpus_allowed= NR_CPUS, -+ .mm = NULL, -+ .active_mm = &init_mm, -+ .restart_block = { -+ .fn = do_no_restart_syscall, -+ }, -+ .sl_level = 0, /* PDS only */ -+ .sl_node = SKIPLIST_NODE_INIT(init_task.sl_node), /* PDS only */ -+ .time_slice = HZ, /* PDS only */ -+ .tasks = LIST_HEAD_INIT(init_task.tasks), -+#ifdef CONFIG_SMP -+ .pushable_tasks = PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO), ++ .time_slice = HZ, ++#else + .se = { + .group_node = LIST_HEAD_INIT(init_task.se.group_node), + }, +@@ -94,6 +113,7 @@ struct task_struct init_task + .run_list = LIST_HEAD_INIT(init_task.rt.run_list), + .time_slice = RR_TIMESLICE, + }, +#endif -+#ifdef CONFIG_CGROUP_SCHED -+ .sched_task_group = &root_task_group, -+#endif -+ .ptraced = LIST_HEAD_INIT(init_task.ptraced), -+ .ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry), -+ .real_parent = &init_task, -+ .parent = &init_task, -+ .children = LIST_HEAD_INIT(init_task.children), -+ .sibling = LIST_HEAD_INIT(init_task.sibling), -+ .group_leader = &init_task, -+ RCU_POINTER_INITIALIZER(real_cred, &init_cred), -+ RCU_POINTER_INITIALIZER(cred, &init_cred), -+ .comm = INIT_TASK_COMM, -+ .thread = INIT_THREAD, -+ .fs = &init_fs, -+ .files = &init_files, -+ .signal = &init_signals, -+ .sighand = &init_sighand, -+ .nsproxy = &init_nsproxy, -+ .pending = { -+ .list = LIST_HEAD_INIT(init_task.pending.list), -+ .signal = {{0}} -+ }, -+ .blocked = {{0}}, -+ .alloc_lock = __SPIN_LOCK_UNLOCKED(init_task.alloc_lock), -+ .journal_info = NULL, -+ INIT_CPU_TIMERS(init_task) -+ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), -+ .timer_slack_ns = 50000, /* 50 usec default slack */ -+ .thread_pid = &init_struct_pid, -+ .thread_group = LIST_HEAD_INIT(init_task.thread_group), -+ .thread_node = LIST_HEAD_INIT(init_signals.thread_head), -+#ifdef CONFIG_AUDITSYSCALL -+ .loginuid = INVALID_UID, -+ .sessionid = AUDIT_SID_UNSET, -+#endif -+#ifdef CONFIG_PERF_EVENTS -+ .perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex), -+ .perf_event_list = LIST_HEAD_INIT(init_task.perf_event_list), -+#endif -+#ifdef CONFIG_PREEMPT_RCU -+ .rcu_read_lock_nesting = 0, -+ .rcu_read_unlock_special.s = 0, -+ .rcu_node_entry = LIST_HEAD_INIT(init_task.rcu_node_entry), -+ .rcu_blocked_node = NULL, -+#endif -+#ifdef CONFIG_TASKS_RCU -+ .rcu_tasks_holdout = false, -+ .rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list), -+ .rcu_tasks_idle_cpu = -1, -+#endif -+#ifdef CONFIG_CPUSETS -+ .mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq, -+ &init_task.alloc_lock), -+#endif -+#ifdef CONFIG_RT_MUTEXES -+ .pi_waiters = RB_ROOT_CACHED, -+ .pi_top_task = NULL, -+#endif -+ INIT_PREV_CPUTIME(init_task) -+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -+ .vtime.seqcount = SEQCNT_ZERO(init_task.vtime_seqcount), -+ .vtime.starttime = 0, -+ .vtime.state = VTIME_SYS, -+#endif -+#ifdef CONFIG_NUMA_BALANCING -+ .numa_preferred_nid = -1, -+ .numa_group = NULL, -+ .numa_faults = NULL, -+#endif -+#ifdef CONFIG_KASAN -+ .kasan_depth = 1, -+#endif -+#ifdef CONFIG_TRACE_IRQFLAGS -+ .softirqs_enabled = 1, -+#endif -+#ifdef CONFIG_LOCKDEP -+ .lockdep_recursion = 0, -+#endif -+#ifdef CONFIG_FUNCTION_GRAPH_TRACER -+ .ret_stack = NULL, -+#endif -+#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPT) -+ .trace_recursion = 0, -+#endif -+#ifdef CONFIG_LIVEPATCH -+ .patch_state = KLP_UNDEFINED, -+#endif -+#ifdef CONFIG_SECURITY -+ .security = NULL, -+#endif -+#else /* CONFIG_SCHED_PDS */ - #ifdef CONFIG_THREAD_INFO_IN_TASK - .thread_info = INIT_THREAD_INFO(init_task), - .stack_refcount = REFCOUNT_INIT(1), -@@ -182,6 +302,7 @@ struct task_struct init_task - #ifdef CONFIG_SECURITY - .security = NULL, - #endif -+#endif /* CONFIG_SCHED_PDS */ - }; - EXPORT_SYMBOL(init_task); - + .tasks = LIST_HEAD_INIT(init_task.tasks), + #ifdef CONFIG_SMP + .pushable_tasks = PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO), diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c -index 729d3a5c772e..10a7c52b90d5 100644 +index 642415b8c3c9..7e0e1fe18035 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -636,7 +636,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) @@ -787,7 +652,7 @@ index 729d3a5c772e..10a7c52b90d5 100644 } -#ifdef CONFIG_SMP -+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_PDS) ++#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT) /* * Helper routine for generate_sched_domains(). * Do cpusets a, b have overlapping effective cpus_allowed masks? @@ -796,7 +661,7 @@ index 729d3a5c772e..10a7c52b90d5 100644 partition_and_rebuild_sched_domains(ndoms, doms, attr); } -#else /* !CONFIG_SMP */ -+#else /* !CONFIG_SMP || CONFIG_SCHED_PDS */ ++#else /* !CONFIG_SMP || CONFIG_SCHED_ALT */ static void rebuild_sched_domains_locked(void) { } @@ -814,10 +679,10 @@ index 27725754ac99..769d773c7182 100644 d->cpu_count += t1; diff --git a/kernel/exit.c b/kernel/exit.c -index ce2a75bc0ade..f0f864bc1ab9 100644 +index 733e80f334e7..3f3506c851fd 100644 --- a/kernel/exit.c +++ b/kernel/exit.c -@@ -122,7 +122,7 @@ static void __exit_signal(struct task_struct *tsk) +@@ -121,7 +121,7 @@ static void __exit_signal(struct task_struct *tsk) sig->curr_target = next_thread(tsk); } @@ -826,7 +691,7 @@ index ce2a75bc0ade..f0f864bc1ab9 100644 sizeof(unsigned long long)); /* -@@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk) +@@ -142,7 +142,7 @@ static void __exit_signal(struct task_struct *tsk) sig->inblock += task_io_get_inblock(tsk); sig->oublock += task_io_get_oublock(tsk); task_io_accounting_add(&sig->ioac, &tsk->ioac); @@ -836,14 +701,14 @@ index ce2a75bc0ade..f0f864bc1ab9 100644 __unhash_process(tsk, group_dead); write_sequnlock(&sig->stats_lock); diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c -index f6310f848f34..b5de980c7d4e 100644 +index f6310f848f34..4176ad070bc9 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -306,7 +306,11 @@ static bool klp_try_switch_task(struct task_struct *task) */ rq = task_rq_lock(task, &flags); -+#ifdef CONFIG_SCHED_PDS ++#ifdef CONFIG_SCHED_ALT + if (task_running(task) && task != current) { +#else if (task_running(rq, task) && task != current) { @@ -852,10 +717,10 @@ index f6310f848f34..b5de980c7d4e 100644 "%s: %s:%d is running\n", __func__, task->comm, task->pid); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c -index c9f090d64f00..063d15a1ab8b 100644 +index cfdd5b93264d..84c284eb544a 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -229,7 +229,7 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, +@@ -227,15 +227,19 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, * Only use with rt_mutex_waiter_{less,equal}() */ #define task_to_waiter(p) \ @@ -864,7 +729,54 @@ index c9f090d64f00..063d15a1ab8b 100644 static inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, -@@ -680,7 +680,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + struct rt_mutex_waiter *right) + { ++#ifdef CONFIG_SCHED_PDS ++ return (left->deadline < right->deadline); ++#else + if (left->prio < right->prio) + return 1; + ++#ifndef CONFIG_SCHED_BMQ + /* + * If both waiters have dl_prio(), we check the deadlines of the + * associated tasks. +@@ -244,17 +248,23 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left, + */ + if (dl_prio(left->prio)) + return dl_time_before(left->deadline, right->deadline); ++#endif + + return 0; ++#endif + } + + static inline int + rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + struct rt_mutex_waiter *right) + { ++#ifdef CONFIG_SCHED_PDS ++ return (left->deadline == right->deadline); ++#else + if (left->prio != right->prio) + return 0; + ++#ifndef CONFIG_SCHED_BMQ + /* + * If both waiters have dl_prio(), we check the deadlines of the + * associated tasks. +@@ -263,8 +273,10 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + */ + if (dl_prio(left->prio)) + return left->deadline == right->deadline; ++#endif + + return 1; ++#endif + } + + static void +@@ -678,7 +690,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * the values of the node being removed. */ waiter->prio = task->prio; @@ -873,7 +785,7 @@ index c9f090d64f00..063d15a1ab8b 100644 rt_mutex_enqueue(lock, waiter); -@@ -953,7 +953,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, +@@ -951,7 +963,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, waiter->task = task; waiter->lock = lock; waiter->prio = task->prio; @@ -883,10 +795,10 @@ index c9f090d64f00..063d15a1ab8b 100644 /* Get the top priority waiter on the lock */ if (rt_mutex_has_waiters(lock)) diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile -index 21fb5a5662b5..8ebe4e33fb5f 100644 +index 5fc9c9b70862..eb6d7d87779f 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile -@@ -16,15 +16,21 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) +@@ -22,14 +22,20 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer endif @@ -895,171 +807,34 @@ index 21fb5a5662b5..8ebe4e33fb5f 100644 -obj-y += wait.o wait_bit.o swait.o completion.o - -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o -+ifdef CONFIG_SCHED_PDS -+obj-y += pds.o ++ifdef CONFIG_SCHED_ALT ++obj-y += alt_core.o alt_debug.o +else +obj-y += core.o +obj-y += fair.o rt.o deadline.o -+obj-$(CONFIG_SMP) += cpudeadline.o topology.o stop_task.o ++obj-$(CONFIG_SMP) += cpudeadline.o stop_task.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o -obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o - obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o +endif +obj-y += loadavg.o clock.o cputime.o +obj-y += idle.o +obj-y += wait.o wait_bit.o swait.o completion.o -+obj-$(CONFIG_SMP) += cpupri.o pelt.o ++obj-$(CONFIG_SMP) += cpupri.o pelt.o topology.o +obj-$(CONFIG_SCHEDSTATS) += stats.o + obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o - obj-$(CONFIG_MEMBARRIER) += membarrier.o -diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c -index 7fbaee24c824..28377ad56248 100644 ---- a/kernel/sched/cpufreq_schedutil.c -+++ b/kernel/sched/cpufreq_schedutil.c -@@ -183,6 +183,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, - return cpufreq_driver_resolve_freq(policy, freq); - } - -+#ifndef CONFIG_SCHED_PDS - /* - * This function computes an effective utilization for the given CPU, to be - * used for frequency selection given the linear relation: f = u * f_max. -@@ -300,6 +301,13 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) - - return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL); - } -+#else /* CONFIG_SCHED_PDS */ -+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) -+{ -+ sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); -+ return sg_cpu->max; -+} -+#endif - - /** - * sugov_iowait_reset() - Reset the IO boost status of a CPU. -@@ -443,7 +451,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } - */ - static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy) - { -+#ifndef CONFIG_SCHED_PDS - if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) -+#endif - sg_policy->limits_changed = true; - } - -@@ -686,6 +696,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) - } - - ret = sched_setattr_nocheck(thread, &attr); -+ - if (ret) { - kthread_stop(thread); - pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); -@@ -916,6 +927,7 @@ static int __init sugov_register(void) - core_initcall(sugov_register); - - #ifdef CONFIG_ENERGY_MODEL -+#ifndef CONFIG_SCHED_PDS - extern bool sched_energy_update; - extern struct mutex sched_energy_mutex; - -@@ -946,4 +958,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - } - - } -+#else /* CONFIG_SCHED_PDS */ -+void sched_cpufreq_governor_change(struct cpufreq_policy *policy, -+ struct cpufreq_governor *old_gov) -+{ -+} -+#endif - #endif -diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c -index ff9435dee1df..1377ea3d1b76 100644 ---- a/kernel/sched/cputime.c -+++ b/kernel/sched/cputime.c -@@ -122,7 +122,12 @@ void account_user_time(struct task_struct *p, u64 cputime) - p->utime += cputime; - account_group_user_time(p, cputime); - -+#ifdef CONFIG_SCHED_PDS -+ index = (task_nice(p) > 0 || task_running_idle(p)) ? CPUTIME_NICE : -+ CPUTIME_USER; -+#else - index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; -+#endif - - /* Add user time to cpustat. */ - task_group_account_field(p, index, cputime); -@@ -146,7 +151,11 @@ void account_guest_time(struct task_struct *p, u64 cputime) - p->gtime += cputime; - - /* Add guest time to cpustat. */ -+#ifdef CONFIG_SCHED_PDS -+ if (task_nice(p) > 0 || task_running_idle(p)) { -+#else - if (task_nice(p) > 0) { -+#endif - cpustat[CPUTIME_NICE] += cputime; - cpustat[CPUTIME_GUEST_NICE] += cputime; - } else { -@@ -269,7 +278,7 @@ static inline u64 account_other_time(u64 max) - #ifdef CONFIG_64BIT - static inline u64 read_sum_exec_runtime(struct task_struct *t) - { -- return t->se.sum_exec_runtime; -+ return tsk_seruntime(t); - } - #else - static u64 read_sum_exec_runtime(struct task_struct *t) -@@ -279,7 +288,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t) - struct rq *rq; - - rq = task_rq_lock(t, &rf); -- ns = t->se.sum_exec_runtime; -+ ns = tsk_seruntime(t); - task_rq_unlock(rq, t, &rf); - - return ns; -@@ -658,7 +667,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, - void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) - { - struct task_cputime cputime = { -- .sum_exec_runtime = p->se.sum_exec_runtime, -+ .sum_exec_runtime = tsk_seruntime(p), - }; - - task_cputime(p, &cputime.utime, &cputime.stime); -diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c -index b743bf38f08f..16e5754af1cf 100644 ---- a/kernel/sched/idle.c -+++ b/kernel/sched/idle.c -@@ -361,6 +361,7 @@ void cpu_startup_entry(enum cpuhp_state state) - do_idle(); - } - -+#ifndef CONFIG_SCHED_PDS - /* - * idle-task scheduling class. - */ -@@ -481,3 +482,4 @@ const struct sched_class idle_sched_class = { - .switched_to = switched_to_idle, - .update_curr = update_curr_idle, - }; -+#endif -diff --git a/kernel/sched/pds.c b/kernel/sched/pds.c +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c new file mode 100644 -index 000000000000..02d7d5a67c77 +index 000000000000..f36264fea75c --- /dev/null -+++ b/kernel/sched/pds.c -@@ -0,0 +1,6611 @@ ++++ b/kernel/sched/alt_core.c +@@ -0,0 +1,6360 @@ +/* -+ * kernel/sched/pds.c, was kernel/sched.c ++ * kernel/sched/alt_core.c + * -+ * PDS-mq Core kernel scheduler code and related syscalls ++ * Core alternative kernel scheduler code and related syscalls + * + * Copyright (C) 1991-2002 Linus Torvalds + * @@ -1067,8 +842,9 @@ index 000000000000..02d7d5a67c77 + * a whole lot of those previous things. + * 2017-09-06 Priority and Deadline based Skip list multiple queue kernel + * scheduler by Alfred Chen. ++ * 2019-02-20 BMQ(BitMap Queue) kernel scheduler by Alfred Chen. + */ -+#include "pds_sched.h" ++#include "sched.h" + +#include + @@ -1088,6 +864,7 @@ index 000000000000..02d7d5a67c77 +#include + +#include ++#include + +#include + @@ -1101,176 +878,59 @@ index 000000000000..02d7d5a67c77 +#define CREATE_TRACE_POINTS +#include + ++#define ALT_SCHED_VERSION "v5.9-r0" + -+#define rt_prio(prio) ((prio) < MAX_RT_PRIO) ++/* rt_prio(prio) defined in include/linux/sched/rt.h */ +#define rt_task(p) rt_prio((p)->prio) -+#define rt_policy(policy) ((policy) == SCHED_FIFO || \ -+ (policy) == SCHED_RR || \ -+ (policy) == SCHED_ISO) ++#define rt_policy(policy) ((policy) == SCHED_FIFO || (policy) == SCHED_RR) +#define task_has_rt_policy(p) (rt_policy((p)->policy)) + -+#define idle_policy(policy) ((policy) == SCHED_IDLE) -+#define idleprio_task(p) unlikely(idle_policy((p)->policy)) -+ +#define STOP_PRIO (MAX_RT_PRIO - 1) + -+/* -+ * Some helpers for converting to/from various scales. Use shifts to get -+ * approximate multiples of ten for less overhead. -+ */ -+#define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) -+#define JIFFY_NS (1000000000 / HZ) -+#define HALF_JIFFY_NS (1000000000 / HZ / 2) -+#define HALF_JIFFY_US (1000000 / HZ / 2) -+#define MS_TO_NS(TIME) ((TIME) << 20) -+#define MS_TO_US(TIME) ((TIME) << 10) -+#define NS_TO_MS(TIME) ((TIME) >> 20) -+#define NS_TO_US(TIME) ((TIME) >> 10) -+#define US_TO_NS(TIME) ((TIME) << 10) ++/* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ ++u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000); + -+#define RESCHED_US (100) /* Reschedule if less than this many μs left */ -+ -+enum { -+ BASE_CPU_AFFINITY_CHK_LEVEL = 1, -+#ifdef CONFIG_SCHED_SMT -+ SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, -+#endif -+#ifdef CONFIG_SCHED_MC -+ MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, -+#endif -+ NR_CPU_AFFINITY_CHK_LEVEL -+}; -+ -+static inline void print_scheduler_version(void) ++static int __init sched_timeslice(char *str) +{ -+ printk(KERN_INFO "pds: PDS-mq CPU Scheduler 0.99o by Alfred Chen and kept alive artificially by Tk-Glitch.\n"); ++ int timeslice_us; ++ ++ get_option(&str, ×lice_us); ++ if (timeslice_us >= 1000) ++ sched_timeslice_ns = timeslice_us * 1000; ++ ++ return 0; +} ++early_param("sched_timeslice", sched_timeslice); + -+/* -+ * This is the time all tasks within the same priority round robin. -+ * Value is in ms and set to a minimum of 6ms. Scales with number of cpus. -+ * Tunable via /proc interface. -+ */ -+#define SCHED_DEFAULT_RR (4) -+int rr_interval __read_mostly = SCHED_DEFAULT_RR; -+ -+static int __init rr_interval_set(char *str) -+{ -+ u32 rr; -+ -+ pr_info("rr_interval: "); -+ if (kstrtouint(str, 0, &rr)) { -+ pr_cont("using default of %u, unable to parse %s\n", -+ rr_interval, str); -+ return 1; -+ } -+ -+ rr_interval = rr; -+ pr_cont("%d\n", rr_interval); -+ -+ return 1; -+} -+__setup("rr_interval=", rr_interval_set); -+ -+ -+static const u64 sched_prio2deadline[NICE_WIDTH] = { -+/* -20 */ 6291456, 6920601, 7612661, 8373927, 9211319, -+/* -15 */ 10132450, 11145695, 12260264, 13486290, 14834919, -+/* -10 */ 16318410, 17950251, 19745276, 21719803, 23891783, -+/* -5 */ 26280961, 28909057, 31799962, 34979958, 38477953, -+/* 0 */ 42325748, 46558322, 51214154, 56335569, 61969125, -+/* 5 */ 68166037, 74982640, 82480904, 90728994, 99801893, -+/* 10 */ 109782082, 120760290, 132836319, 146119950, 160731945, -+/* 15 */ 176805139, 194485652, 213934217, 235327638, 258860401 -+}; ++/* Reschedule if less than this many μs left */ ++#define RESCHED_NS (100 * 1000) + +/** + * sched_yield_type - Choose what sort of yield sched_yield will perform. + * 0: No yield. -+ * 1: Yield only to better priority/deadline tasks. (default) -+ * 2: Expire timeslice and recalculate deadline. ++ * 1: Deboost and requeue task. (default) ++ * 2: Set rq skip task. + */ +int sched_yield_type __read_mostly = 1; + -+/* -+ * The quota handed out to tasks of all priority levels when refilling their -+ * time_slice. -+ */ -+static inline int timeslice(void) -+{ -+ return MS_TO_US(rr_interval); -+} -+ +#ifdef CONFIG_SMP -+enum { -+SCHED_RQ_EMPTY = 0, -+SCHED_RQ_IDLE, -+SCHED_RQ_NORMAL_0, -+SCHED_RQ_NORMAL_1, -+SCHED_RQ_NORMAL_2, -+SCHED_RQ_NORMAL_3, -+SCHED_RQ_NORMAL_4, -+SCHED_RQ_NORMAL_5, -+SCHED_RQ_NORMAL_6, -+SCHED_RQ_NORMAL_7, -+SCHED_RQ_ISO, -+SCHED_RQ_RT, -+NR_SCHED_RQ_QUEUED_LEVEL -+}; ++static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; + -+static cpumask_t sched_rq_queued_masks[NR_SCHED_RQ_QUEUED_LEVEL] -+____cacheline_aligned_in_smp; -+ -+static DECLARE_BITMAP(sched_rq_queued_masks_bitmap, NR_SCHED_RQ_QUEUED_LEVEL) -+____cacheline_aligned_in_smp; -+ -+static cpumask_t sched_rq_pending_masks[NR_SCHED_RQ_QUEUED_LEVEL] -+____cacheline_aligned_in_smp; -+ -+static DECLARE_BITMAP(sched_rq_pending_masks_bitmap, NR_SCHED_RQ_QUEUED_LEVEL) -+____cacheline_aligned_in_smp; -+ -+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_chk_masks); -+DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_start_mask); -+DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_chk_end_masks); ++DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); ++DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); ++DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); + +#ifdef CONFIG_SCHED_SMT -+DEFINE_PER_CPU(int, sched_sibling_cpu); +DEFINE_STATIC_KEY_FALSE(sched_smt_present); +EXPORT_SYMBOL_GPL(sched_smt_present); -+ -+static cpumask_t sched_cpu_sg_idle_mask ____cacheline_aligned_in_smp; -+ -+#ifdef CONFIG_SMT_NICE -+/* -+ * Preemptible sibling group mask -+ * Which all sibling cpus are running at PRIO_LIMIT or IDLE_PRIO -+ */ -+static cpumask_t sched_cpu_psg_mask ____cacheline_aligned_in_smp; -+/* -+ * SMT supressed mask -+ * When a cpu is running task with NORMAL/ISO/RT policy, its sibling cpu -+ * will be supressed to run IDLE priority task. -+ */ -+static cpumask_t sched_smt_supressed_mask ____cacheline_aligned_in_smp; -+#endif /* CONFIG_SMT_NICE */ +#endif + -+static int sched_rq_prio[NR_CPUS] ____cacheline_aligned; -+ +/* + * Keep a unique ID per domain (we use the first CPUs number in the cpumask of + * the domain), this allows us to quickly tell if two cpus are in the same cache + * domain, see cpus_share_cache(). + */ +DEFINE_PER_CPU(int, sd_llc_id); -+ -+int __weak arch_sd_sibling_asym_packing(void) -+{ -+ return 0*SD_ASYM_PACKING; -+} -+#else -+struct rq *uprq; +#endif /* CONFIG_SMP */ + +static DEFINE_MUTEX(sched_hotcpu_mutex); @@ -1284,6 +944,166 @@ index 000000000000..02d7d5a67c77 +# define finish_arch_post_lock_switch() do { } while (0) +#endif + ++#define IDLE_WM (IDLE_TASK_SCHED_PRIO) ++ ++#ifdef CONFIG_SCHED_SMT ++static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; ++#endif ++static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; ++ ++#ifdef CONFIG_SCHED_BMQ ++#include "bmq_imp.h" ++#endif ++#ifdef CONFIG_SCHED_PDS ++#include "pds_imp.h" ++#endif ++ ++static inline void update_sched_rq_watermark(struct rq *rq) ++{ ++ unsigned long watermark = sched_queue_watermark(rq); ++ unsigned long last_wm = rq->watermark; ++ unsigned long i; ++ int cpu; ++ ++ /*printk(KERN_INFO "sched: watermark(%d) %d, last %d\n", ++ cpu_of(rq), watermark, last_wm);*/ ++ if (watermark == last_wm) ++ return; ++ ++ rq->watermark = watermark; ++ cpu = cpu_of(rq); ++ if (watermark < last_wm) { ++ for (i = watermark + 1; i <= last_wm; i++) ++ cpumask_andnot(&sched_rq_watermark[i], ++ &sched_rq_watermark[i], cpumask_of(cpu)); ++#ifdef CONFIG_SCHED_SMT ++ if (!static_branch_likely(&sched_smt_present)) ++ return; ++ if (IDLE_WM == last_wm) ++ cpumask_andnot(&sched_sg_idle_mask, ++ &sched_sg_idle_mask, cpu_smt_mask(cpu)); ++#endif ++ return; ++ } ++ /* last_wm < watermark */ ++ for (i = last_wm + 1; i <= watermark; i++) ++ cpumask_set_cpu(cpu, &sched_rq_watermark[i]); ++#ifdef CONFIG_SCHED_SMT ++ if (!static_branch_likely(&sched_smt_present)) ++ return; ++ if (IDLE_WM == watermark) { ++ cpumask_t tmp; ++ cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]); ++ if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) ++ cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), ++ &sched_sg_idle_mask); ++ } ++#endif ++} ++ ++static inline struct task_struct *rq_runnable_task(struct rq *rq) ++{ ++ struct task_struct *next = sched_rq_first_task(rq); ++ ++ if (unlikely(next == rq->skip)) ++ next = sched_rq_next_task(next, rq); ++ ++ return next; ++} ++ ++/* ++ * Serialization rules: ++ * ++ * Lock order: ++ * ++ * p->pi_lock ++ * rq->lock ++ * hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls) ++ * ++ * rq1->lock ++ * rq2->lock where: rq1 < rq2 ++ * ++ * Regular state: ++ * ++ * Normal scheduling state is serialized by rq->lock. __schedule() takes the ++ * local CPU's rq->lock, it optionally removes the task from the runqueue and ++ * always looks at the local rq data structures to find the most elegible task ++ * to run next. ++ * ++ * Task enqueue is also under rq->lock, possibly taken from another CPU. ++ * Wakeups from another LLC domain might use an IPI to transfer the enqueue to ++ * the local CPU to avoid bouncing the runqueue state around [ see ++ * ttwu_queue_wakelist() ] ++ * ++ * Task wakeup, specifically wakeups that involve migration, are horribly ++ * complicated to avoid having to take two rq->locks. ++ * ++ * Special state: ++ * ++ * System-calls and anything external will use task_rq_lock() which acquires ++ * both p->pi_lock and rq->lock. As a consequence the state they change is ++ * stable while holding either lock: ++ * ++ * - sched_setaffinity()/ ++ * set_cpus_allowed_ptr(): p->cpus_ptr, p->nr_cpus_allowed ++ * - set_user_nice(): p->se.load, p->*prio ++ * - __sched_setscheduler(): p->sched_class, p->policy, p->*prio, ++ * p->se.load, p->rt_priority, ++ * p->dl.dl_{runtime, deadline, period, flags, bw, density} ++ * - sched_setnuma(): p->numa_preferred_nid ++ * - sched_move_task()/ ++ * cpu_cgroup_fork(): p->sched_task_group ++ * - uclamp_update_active() p->uclamp* ++ * ++ * p->state <- TASK_*: ++ * ++ * is changed locklessly using set_current_state(), __set_current_state() or ++ * set_special_state(), see their respective comments, or by ++ * try_to_wake_up(). This latter uses p->pi_lock to serialize against ++ * concurrent self. ++ * ++ * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }: ++ * ++ * is set by activate_task() and cleared by deactivate_task(), under ++ * rq->lock. Non-zero indicates the task is runnable, the special ++ * ON_RQ_MIGRATING state is used for migration without holding both ++ * rq->locks. It indicates task_cpu() is not stable, see task_rq_lock(). ++ * ++ * p->on_cpu <- { 0, 1 }: ++ * ++ * is set by prepare_task() and cleared by finish_task() such that it will be ++ * set before p is scheduled-in and cleared after p is scheduled-out, both ++ * under rq->lock. Non-zero indicates the task is running on its CPU. ++ * ++ * [ The astute reader will observe that it is possible for two tasks on one ++ * CPU to have ->on_cpu = 1 at the same time. ] ++ * ++ * task_cpu(p): is changed by set_task_cpu(), the rules are: ++ * ++ * - Don't call set_task_cpu() on a blocked task: ++ * ++ * We don't care what CPU we're not running on, this simplifies hotplug, ++ * the CPU assignment of blocked tasks isn't required to be valid. ++ * ++ * - for try_to_wake_up(), called under p->pi_lock: ++ * ++ * This allows try_to_wake_up() to only take one rq->lock, see its comment. ++ * ++ * - for migration called under rq->lock: ++ * [ see task_on_rq_migrating() in task_rq_lock() ] ++ * ++ * o move_queued_task() ++ * o detach_task() ++ * ++ * - for migration called under double_rq_lock(): ++ * ++ * o __migrate_swap_task() ++ * o push_rt_task() / pull_rt_task() ++ * o push_dl_task() / pull_dl_task() ++ * o dl_task_offline_migration() ++ * ++ */ ++ +/* + * Context: p->pi_lock + */ @@ -1420,6 +1240,20 @@ index 000000000000..02d7d5a67c77 + } +} + ++static inline void ++rq_lock_irqsave(struct rq *rq, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ raw_spin_lock_irqsave(&rq->lock, rf->flags); ++} ++ ++static inline void ++rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) ++ __releases(rq->lock) ++{ ++ raw_spin_unlock_irqrestore(&rq->lock, rf->flags); ++} ++ +/* + * RQ-clock updating methods: + */ @@ -1465,7 +1299,6 @@ index 000000000000..02d7d5a67c77 + steal = delta; + + rq->prev_steal_time_rq += steal; -+ + delta -= steal; + } +#endif @@ -1488,82 +1321,85 @@ index 000000000000..02d7d5a67c77 + update_rq_clock_task(rq, delta); +} + -+static inline void update_task_priodl(struct task_struct *p) ++#ifdef CONFIG_NO_HZ_FULL ++/* ++ * Tick may be needed by tasks in the runqueue depending on their policy and ++ * requirements. If tick is needed, lets send the target an IPI to kick it out ++ * of nohz mode if necessary. ++ */ ++static inline void sched_update_tick_dependency(struct rq *rq) +{ -+ p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8); ++ int cpu = cpu_of(rq); ++ ++ if (!tick_nohz_full_cpu(cpu)) ++ return; ++ ++ if (rq->nr_running < 2) ++ tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED); ++ else ++ tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); +} ++#else /* !CONFIG_NO_HZ_FULL */ ++static inline void sched_update_tick_dependency(struct rq *rq) { } ++#endif + +/* -+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline -+ * is the key to everything. It distributes CPU fairly amongst tasks of the -+ * same nice value, it proportions CPU according to nice level, it means the -+ * task that last woke up the longest ago has the earliest deadline, thus -+ * ensuring that interactive tasks get low latency on wake up. The CPU -+ * proportion works out to the square of the virtual deadline difference, so -+ * this equation will give nice 19 3% CPU compared to nice 0. ++ * Add/Remove/Requeue task to/from the runqueue routines ++ * Context: rq->lock + */ -+static inline u64 task_deadline_diff(const struct task_struct *p) ++static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) +{ -+ return sched_prio2deadline[TASK_USER_PRIO(p)]; ++ lockdep_assert_held(&rq->lock); ++ ++ /*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ ++ WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n", ++ task_cpu(p), cpu_of(rq)); ++ ++ __SCHED_DEQUEUE_TASK(p, rq, flags, update_sched_rq_watermark(rq)); ++ --rq->nr_running; ++#ifdef CONFIG_SMP ++ if (1 == rq->nr_running) ++ cpumask_clear_cpu(cpu_of(rq), &sched_rq_pending_mask); ++#endif ++ ++ sched_update_tick_dependency(rq); +} + -+static inline u64 static_deadline_diff(int static_prio) ++static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) +{ -+ return sched_prio2deadline[USER_PRIO(static_prio)]; ++ lockdep_assert_held(&rq->lock); ++ ++ /*printk(KERN_INFO "sched: enqueue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ ++ WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n", ++ task_cpu(p), cpu_of(rq)); ++ ++ __SCHED_ENQUEUE_TASK(p, rq, flags); ++ update_sched_rq_watermark(rq); ++ ++rq->nr_running; ++#ifdef CONFIG_SMP ++ if (2 == rq->nr_running) ++ cpumask_set_cpu(cpu_of(rq), &sched_rq_pending_mask); ++#endif ++ ++ sched_update_tick_dependency(rq); ++ ++ /* ++ * If in_iowait is set, the code below may not trigger any cpufreq ++ * utilization updates, so do it here explicitly with the IOWAIT flag ++ * passed. ++ */ ++ if (p->in_iowait) ++ cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); +} + -+/* -+ * The time_slice is only refilled when it is empty and that is when we set a -+ * new deadline for non-rt tasks. -+ */ -+static inline void time_slice_expired(struct task_struct *p, struct rq *rq) ++static inline void requeue_task(struct task_struct *p, struct rq *rq) +{ -+ p->time_slice = timeslice(); -+ if (p->prio >= NORMAL_PRIO) -+ p->deadline = rq->clock + task_deadline_diff(p); ++ lockdep_assert_held(&rq->lock); ++ /*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ ++ WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n", ++ cpu_of(rq), task_cpu(p)); + -+ update_task_priodl(p); -+} -+ -+static inline struct task_struct *rq_first_queued_task(struct rq *rq) -+{ -+ struct skiplist_node *node = rq->sl_header.next[0]; -+ -+ if (node == &rq->sl_header) -+ return rq->idle; -+ -+ return skiplist_entry(node, struct task_struct, sl_node); -+} -+ -+static inline struct task_struct *rq_second_queued_task(struct rq *rq) -+{ -+ struct skiplist_node *node = rq->sl_header.next[0]->next[0]; -+ -+ if (node == &rq->sl_header) -+ return rq->idle; -+ -+ return skiplist_entry(node, struct task_struct, sl_node); -+} -+ -+static inline int is_second_in_rq(struct task_struct *p, struct rq *rq) -+{ -+ return (p->sl_node.prev[0]->prev[0] == &rq->sl_header); -+} -+ -+static const int task_dl_hash_tbl[] = { -+/* 0 4 8 12 */ -+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -+/* 16 20 24 28 */ -+ 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 7 -+}; -+ -+static inline int -+task_deadline_level(const struct task_struct *p, const struct rq *rq) -+{ -+ u64 delta = (rq->clock + sched_prio2deadline[39] - p->deadline) >> 23; -+ -+ delta = min((size_t)delta, ARRAY_SIZE(task_dl_hash_tbl) - 1); -+ return task_dl_hash_tbl[delta]; ++ __SCHED_REQUEUE_TASK(p, rq, update_sched_rq_watermark(rq)); +} + +/* @@ -1600,7 +1436,7 @@ index 000000000000..02d7d5a67c77 + * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set. + * + * If this returns true, then the idle task promises to call -+ * flush_smp_call_function_from_idle() and reschedule soon. ++ * sched_ttwu_pending() and reschedule soon. + */ +static bool set_nr_if_polling(struct task_struct *p) +{ @@ -1635,335 +1471,91 @@ index 000000000000..02d7d5a67c77 +#endif +#endif + -+#ifdef CONFIG_SMP -+#ifdef CONFIG_SMT_NICE -+static void resched_cpu_if_curr_is(int cpu, int priority) ++static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) +{ -+ struct rq *rq = cpu_rq(cpu); ++ struct wake_q_node *node = &task->wake_q; + -+ rcu_read_lock(); -+ -+ if (rcu_dereference(rq->curr)->prio != priority) -+ goto out; -+ -+ if (set_nr_if_polling(rq->idle)) { -+ trace_sched_wake_idle_without_ipi(cpu); -+ } else { -+ if (!do_raw_spin_trylock(&rq->lock)) -+ goto out; -+ spin_acquire(&rq->lock.dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_); -+ -+ if (priority == rq->curr->prio) -+ smp_send_reschedule(cpu); -+ /* Else CPU is not idle, do nothing here */ -+ -+ spin_release(&rq->lock.dep_map, _RET_IP_); -+ do_raw_spin_unlock(&rq->lock); -+ } -+ -+out: -+ rcu_read_unlock(); -+} -+#endif /* CONFIG_SMT_NICE */ -+ -+static inline bool -+__update_cpumasks_bitmap(int cpu, unsigned long *plevel, unsigned long level, -+ cpumask_t cpumasks[], unsigned long bitmap[]) -+{ -+ if (*plevel == level) ++ /* ++ * Atomically grab the task, if ->wake_q is !nil already it means ++ * its already queued (either by us or someone else) and will get the ++ * wakeup due to that. ++ * ++ * In order to ensure that a pending wakeup will observe our pending ++ * state, even in the failed case, an explicit smp_mb() must be used. ++ */ ++ smp_mb__before_atomic(); ++ if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) + return false; + -+ cpumask_clear_cpu(cpu, cpumasks + *plevel); -+ if (cpumask_empty(cpumasks + *plevel)) -+ clear_bit(*plevel, bitmap); -+ cpumask_set_cpu(cpu, cpumasks + level); -+ set_bit(level, bitmap); -+ -+ *plevel = level; -+ ++ /* ++ * The head is context local, there can be no concurrency. ++ */ ++ *head->lastp = node; ++ head->lastp = &node->next; + return true; +} + -+static inline int -+task_running_policy_level(const struct task_struct *p, const struct rq *rq) -+{ -+ int prio = p->prio; -+ -+ if (NORMAL_PRIO == prio) -+ return SCHED_RQ_NORMAL_0 + task_deadline_level(p, rq); -+ -+ if (ISO_PRIO == prio) -+ return SCHED_RQ_ISO; -+ if (prio < MAX_RT_PRIO) -+ return SCHED_RQ_RT; -+ return PRIO_LIMIT - prio; -+} -+ -+static inline void update_sched_rq_queued_masks_normal(struct rq *rq) -+{ -+ struct task_struct *p = rq_first_queued_task(rq); -+ -+ if (p->prio != NORMAL_PRIO) -+ return; -+ -+ __update_cpumasks_bitmap(cpu_of(rq), &rq->queued_level, -+ task_running_policy_level(p, rq), -+ &sched_rq_queued_masks[0], -+ &sched_rq_queued_masks_bitmap[0]); -+} -+ -+#ifdef CONFIG_SMT_NICE -+static inline void update_sched_cpu_psg_mask(const int cpu) -+{ -+ cpumask_t tmp; -+ -+ cpumask_or(&tmp, &sched_rq_queued_masks[SCHED_RQ_EMPTY], -+ &sched_rq_queued_masks[SCHED_RQ_IDLE]); -+ cpumask_and(&tmp, &tmp, cpu_smt_mask(cpu)); -+ if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) -+ cpumask_or(&sched_cpu_psg_mask, &sched_cpu_psg_mask, -+ cpu_smt_mask(cpu)); -+ else -+ cpumask_andnot(&sched_cpu_psg_mask, &sched_cpu_psg_mask, -+ cpu_smt_mask(cpu)); -+} -+#endif -+ -+static inline void update_sched_rq_queued_masks(struct rq *rq) -+{ -+ int cpu = cpu_of(rq); -+ struct task_struct *p = rq_first_queued_task(rq); -+ unsigned long level; -+#ifdef CONFIG_SCHED_SMT -+ unsigned long last_level = rq->queued_level; -+#endif -+ -+ level = task_running_policy_level(p, rq); -+ sched_rq_prio[cpu] = p->prio; -+ -+ if (!__update_cpumasks_bitmap(cpu, &rq->queued_level, level, -+ &sched_rq_queued_masks[0], -+ &sched_rq_queued_masks_bitmap[0])) -+ return; -+ -+#ifdef CONFIG_SCHED_SMT -+ if (cpu == per_cpu(sched_sibling_cpu, cpu)) -+ return; -+ -+ if (SCHED_RQ_EMPTY == last_level) { -+ cpumask_andnot(&sched_cpu_sg_idle_mask, &sched_cpu_sg_idle_mask, -+ cpu_smt_mask(cpu)); -+ } else if (SCHED_RQ_EMPTY == level) { -+ cpumask_t tmp; -+ -+ cpumask_and(&tmp, cpu_smt_mask(cpu), -+ &sched_rq_queued_masks[SCHED_RQ_EMPTY]); -+ if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) -+ cpumask_or(&sched_cpu_sg_idle_mask, cpu_smt_mask(cpu), -+ &sched_cpu_sg_idle_mask); -+ } -+ -+#ifdef CONFIG_SMT_NICE -+ if (level <= SCHED_RQ_IDLE && last_level > SCHED_RQ_IDLE) { -+ cpumask_clear_cpu(per_cpu(sched_sibling_cpu, cpu), -+ &sched_smt_supressed_mask); -+ update_sched_cpu_psg_mask(cpu); -+ resched_cpu_if_curr_is(per_cpu(sched_sibling_cpu, cpu), PRIO_LIMIT); -+ } else if (last_level <= SCHED_RQ_IDLE && level > SCHED_RQ_IDLE) { -+ cpumask_set_cpu(per_cpu(sched_sibling_cpu, cpu), -+ &sched_smt_supressed_mask); -+ update_sched_cpu_psg_mask(cpu); -+ resched_cpu_if_curr_is(per_cpu(sched_sibling_cpu, cpu), IDLE_PRIO); -+ } -+#endif /* CONFIG_SMT_NICE */ -+#endif -+} -+ -+static inline void update_sched_rq_pending_masks(struct rq *rq) -+{ -+ unsigned long level; -+ struct task_struct *p = rq_second_queued_task(rq); -+ -+ level = task_running_policy_level(p, rq); -+ -+ __update_cpumasks_bitmap(cpu_of(rq), &rq->pending_level, level, -+ &sched_rq_pending_masks[0], -+ &sched_rq_pending_masks_bitmap[0]); -+} -+ -+#else /* CONFIG_SMP */ -+static inline void update_sched_rq_queued_masks(struct rq *rq) {} -+static inline void update_sched_rq_queued_masks_normal(struct rq *rq) {} -+static inline void update_sched_rq_pending_masks(struct rq *rq) {} -+#endif -+ -+#ifdef CONFIG_NO_HZ_FULL -+/* -+ * Tick may be needed by tasks in the runqueue depending on their policy and -+ * requirements. If tick is needed, lets send the target an IPI to kick it out -+ * of nohz mode if necessary. -+ */ -+static inline void sched_update_tick_dependency(struct rq *rq) -+{ -+ int cpu; -+ -+ if (!tick_nohz_full_enabled()) -+ return; -+ -+ cpu = cpu_of(rq); -+ -+ if (!tick_nohz_full_cpu(cpu)) -+ return; -+ -+ if (rq->nr_running < 2) -+ tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED); -+ else -+ tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); -+} -+#else /* !CONFIG_NO_HZ_FULL */ -+static inline void sched_update_tick_dependency(struct rq *rq) { } -+#endif -+ -+/* -+ * Removing from the runqueue. Deleting a task from the skip list is done -+ * via the stored node reference in the task struct and does not require a full -+ * look up. Thus it occurs in O(k) time where k is the "level" of the list the -+ * task was stored at - usually < 4, max 16. ++/** ++ * wake_q_add() - queue a wakeup for 'later' waking. ++ * @head: the wake_q_head to add @task to ++ * @task: the task to queue for 'later' wakeup + * -+ * Context: rq->lock -+ */ -+static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) -+{ -+ lockdep_assert_held(&rq->lock); -+ -+ WARN_ONCE(task_rq(p) != rq, "pds: dequeue task reside on cpu%d from cpu%d\n", -+ task_cpu(p), cpu_of(rq)); -+ if (skiplist_del_init(&rq->sl_header, &p->sl_node)) { -+ update_sched_rq_queued_masks(rq); -+ update_sched_rq_pending_masks(rq); -+ } else if (is_second_in_rq(p, rq)) -+ update_sched_rq_pending_masks(rq); -+ rq->nr_running--; -+ -+ sched_update_tick_dependency(rq); -+ psi_dequeue(p, flags & DEQUEUE_SLEEP); -+ -+ sched_info_dequeued(rq, p); -+} -+ -+/* -+ * To determine if it's safe for a task of SCHED_IDLE to actually run as -+ * an idle task, we ensure none of the following conditions are met. -+ */ -+static inline bool idleprio_suitable(struct task_struct *p) -+{ -+ return (!freezing(p) && !signal_pending(p) && -+ !(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING))); -+} -+ -+/* -+ * pds_skiplist_random_level -- Returns a pseudo-random level number for skip -+ * list node which is used in PDS run queue. ++ * Queue a task for later wakeup, most likely by the wake_up_q() call in the ++ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come ++ * instantly. + * -+ * In current implementation, based on testing, the first 8 bits in microseconds -+ * of niffies are suitable for random level population. -+ * find_first_bit() is used to satisfy p = 0.5 between each levels, and there -+ * should be platform hardware supported instruction(known as ctz/clz) to speed -+ * up this function. -+ * The skiplist level for a task is populated when task is created and doesn't -+ * change in task's life time. When task is being inserted into run queue, this -+ * skiplist level is set to task's sl_node->level, the skiplist insert function -+ * may change it based on current level of the skip lsit. ++ * This function must be used as-if it were wake_up_process(); IOW the task ++ * must be ready to be woken at this location. + */ -+static inline int pds_skiplist_random_level(const struct task_struct *p) ++void wake_q_add(struct wake_q_head *head, struct task_struct *task) +{ -+ long unsigned int randseed; -+ -+ /* -+ * 1. Some architectures don't have better than microsecond resolution -+ * so mask out ~microseconds as a factor of the random seed for skiplist -+ * insertion. -+ * 2. Use address of task structure pointer as another factor of the -+ * random seed for task burst forking scenario. -+ */ -+ randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10; -+ -+ return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1); ++ if (__wake_q_add(head, task)) ++ get_task_struct(task); +} + +/** -+ * pds_skiplist_task_search -- search function used in PDS run queue skip list -+ * node insert operation. -+ * @it: iterator pointer to the node in the skip list -+ * @node: pointer to the skiplist_node to be inserted ++ * wake_q_add_safe() - safely queue a wakeup for 'later' waking. ++ * @head: the wake_q_head to add @task to ++ * @task: the task to queue for 'later' wakeup + * -+ * Returns true if key of @it is less or equal to key value of @node, otherwise -+ * false. ++ * Queue a task for later wakeup, most likely by the wake_up_q() call in the ++ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come ++ * instantly. ++ * ++ * This function must be used as-if it were wake_up_process(); IOW the task ++ * must be ready to be woken at this location. ++ * ++ * This function is essentially a task-safe equivalent to wake_q_add(). Callers ++ * that already hold reference to @task can call the 'safe' version and trust ++ * wake_q to do the right thing depending whether or not the @task is already ++ * queued for wakeup. + */ -+static inline bool -+pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node) ++void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) +{ -+ return (skiplist_entry(it, struct task_struct, sl_node)->priodl <= -+ skiplist_entry(node, struct task_struct, sl_node)->priodl); ++ if (!__wake_q_add(head, task)) ++ put_task_struct(task); +} + -+/* -+ * Define the skip list insert function for PDS -+ */ -+DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search); -+ -+/* -+ * Adding task to the runqueue. -+ * -+ * Context: rq->lock -+ */ -+static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) ++void wake_up_q(struct wake_q_head *head) +{ -+ lockdep_assert_held(&rq->lock); ++ struct wake_q_node *node = head->first; + -+ WARN_ONCE(task_rq(p) != rq, "pds: enqueue task reside on cpu%d to cpu%d\n", -+ task_cpu(p), cpu_of(rq)); ++ while (node != WAKE_Q_TAIL) { ++ struct task_struct *task; + -+ p->sl_node.level = p->sl_level; -+ if (pds_skiplist_insert(&rq->sl_header, &p->sl_node)) { -+ update_sched_rq_queued_masks(rq); -+ update_sched_rq_pending_masks(rq); -+ } else if (is_second_in_rq(p, rq)) -+ update_sched_rq_pending_masks(rq); -+ rq->nr_running++; ++ task = container_of(node, struct task_struct, wake_q); ++ BUG_ON(!task); ++ /* task can safely be re-inserted now: */ ++ node = node->next; ++ task->wake_q.next = NULL; + -+ sched_update_tick_dependency(rq); -+ -+ sched_info_queued(rq, p); -+ psi_enqueue(p, flags); -+ -+ /* -+ * If in_iowait is set, the code below may not trigger any cpufreq -+ * utilization updates, so do it here explicitly with the IOWAIT flag -+ * passed. -+ */ -+ if (p->in_iowait) -+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT); -+} -+ -+static inline void requeue_task(struct task_struct *p, struct rq *rq) -+{ -+ bool b_first, b_second; -+ -+ lockdep_assert_held(&rq->lock); -+ -+ WARN_ONCE(task_rq(p) != rq, "pds: cpu[%d] requeue task reside on cpu%d\n", -+ cpu_of(rq), task_cpu(p)); -+ -+ b_first = skiplist_del_init(&rq->sl_header, &p->sl_node); -+ b_second = is_second_in_rq(p, rq); -+ -+ p->sl_node.level = p->sl_level; -+ if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) { -+ update_sched_rq_queued_masks(rq); -+ update_sched_rq_pending_masks(rq); -+ } else if (is_second_in_rq(p, rq) || b_second) -+ update_sched_rq_pending_masks(rq); ++ /* ++ * wake_up_process() executes a full barrier, which pairs with ++ * the queueing in wake_q_add() so as not to miss wakeups. ++ */ ++ wake_up_process(task); ++ put_task_struct(task); ++ } +} + +/* @@ -1996,20 +1588,142 @@ index 000000000000..02d7d5a67c77 + trace_sched_wake_idle_without_ipi(cpu); +} + -+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) ++void resched_cpu(int cpu) +{ -+ struct task_struct *curr = rq->curr; ++ struct rq *rq = cpu_rq(cpu); ++ unsigned long flags; + -+ if (curr->prio == PRIO_LIMIT) -+ resched_curr(rq); ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ if (cpu_online(cpu) || cpu == smp_processor_id()) ++ resched_curr(cpu_rq(cpu)); ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++} + -+ if (task_running_idle(p)) ++#ifdef CONFIG_SMP ++#ifdef CONFIG_NO_HZ_COMMON ++void nohz_balance_enter_idle(int cpu) {} ++ ++void select_nohz_load_balancer(int stop_tick) {} ++ ++void set_cpu_sd_state_idle(void) {} ++ ++/* ++ * In the semi idle case, use the nearest busy CPU for migrating timers ++ * from an idle CPU. This is good for power-savings. ++ * ++ * We don't do similar optimization for completely idle system, as ++ * selecting an idle CPU will add more delays to the timers than intended ++ * (as that CPU's timer base may not be uptodate wrt jiffies etc). ++ */ ++int get_nohz_timer_target(void) ++{ ++ int i, cpu = smp_processor_id(), default_cpu = -1; ++ struct cpumask *mask; ++ ++ if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) { ++ if (!idle_cpu(cpu)) ++ return cpu; ++ default_cpu = cpu; ++ } ++ ++ for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) ++ for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) ++ if (!idle_cpu(i)) ++ return i; ++ ++ if (default_cpu == -1) ++ default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER); ++ cpu = default_cpu; ++ ++ return cpu; ++} ++ ++/* ++ * When add_timer_on() enqueues a timer into the timer wheel of an ++ * idle CPU then this timer might expire before the next timer event ++ * which is scheduled to wake up that CPU. In case of a completely ++ * idle system the next event might even be infinite time into the ++ * future. wake_up_idle_cpu() ensures that the CPU is woken up and ++ * leaves the inner idle loop so the newly added timer is taken into ++ * account when the CPU goes back to idle and evaluates the timer ++ * wheel for the next timer event. ++ */ ++static inline void wake_up_idle_cpu(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ if (cpu == smp_processor_id()) + return; + -+ if (p->priodl < curr->priodl) ++ if (set_nr_and_not_polling(rq->idle)) ++ smp_send_reschedule(cpu); ++ else ++ trace_sched_wake_idle_without_ipi(cpu); ++} ++ ++static inline bool wake_up_full_nohz_cpu(int cpu) ++{ ++ /* ++ * We just need the target to call irq_exit() and re-evaluate ++ * the next tick. The nohz full kick at least implies that. ++ * If needed we can still optimize that later with an ++ * empty IRQ. ++ */ ++ if (cpu_is_offline(cpu)) ++ return true; /* Don't try to wake offline CPUs. */ ++ if (tick_nohz_full_cpu(cpu)) { ++ if (cpu != smp_processor_id() || ++ tick_nohz_tick_stopped()) ++ tick_nohz_full_kick_cpu(cpu); ++ return true; ++ } ++ ++ return false; ++} ++ ++void wake_up_nohz_cpu(int cpu) ++{ ++ if (!wake_up_full_nohz_cpu(cpu)) ++ wake_up_idle_cpu(cpu); ++} ++ ++static void nohz_csd_func(void *info) ++{ ++ struct rq *rq = info; ++ int cpu = cpu_of(rq); ++ unsigned int flags; ++ ++ /* ++ * Release the rq::nohz_csd. ++ */ ++ flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); ++ WARN_ON(!(flags & NOHZ_KICK_MASK)); ++ ++ rq->idle_balance = idle_cpu(cpu); ++ if (rq->idle_balance && !need_resched()) { ++ rq->nohz_idle_balance = flags; ++ raise_softirq_irqoff(SCHED_SOFTIRQ); ++ } ++} ++ ++#endif /* CONFIG_NO_HZ_COMMON */ ++#endif /* CONFIG_SMP */ ++ ++static inline void check_preempt_curr(struct rq *rq) ++{ ++ if (sched_rq_first_task(rq) != rq->curr) + resched_curr(rq); +} + ++static inline void ++rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func) ++{ ++ csd->flags = 0; ++ csd->func = func; ++ csd->info = rq; ++} ++ +#ifdef CONFIG_SCHED_HRTICK +/* + * Use HR-timers to deliver accurate preemption points. @@ -2049,7 +1763,7 @@ index 000000000000..02d7d5a67c77 +static inline int hrtick_enabled(struct rq *rq) +{ + /** -+ * PDS doesn't support sched_feat yet ++ * Alt schedule FW doesn't support sched_feat yet + if (!sched_feat(HRTICK)) + return 0; + */ @@ -2126,23 +1840,12 @@ index 000000000000..02d7d5a67c77 +static void hrtick_rq_init(struct rq *rq) +{ +#ifdef CONFIG_SMP -+ rq->hrtick_csd.flags = 0; -+ rq->hrtick_csd.func = __hrtick_start; -+ rq->hrtick_csd.info = rq; ++ rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start); +#endif + + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); + rq->hrtick_timer.function = hrtick; +} -+ -+static inline int rq_dither(struct rq *rq) -+{ -+ if ((rq->clock - rq->last_tick > HALF_JIFFY_NS) || hrtick_enabled(rq)) -+ return 0; -+ -+ return HALF_JIFFY_NS; -+} -+ +#else /* CONFIG_SCHED_HRTICK */ +static inline int hrtick_enabled(struct rq *rq) +{ @@ -2156,27 +1859,14 @@ index 000000000000..02d7d5a67c77 +static inline void hrtick_rq_init(struct rq *rq) +{ +} -+ -+static inline int rq_dither(struct rq *rq) -+{ -+ return (rq->clock - rq->last_tick > HALF_JIFFY_NS)? 0:HALF_JIFFY_NS; -+} +#endif /* CONFIG_SCHED_HRTICK */ + +static inline int normal_prio(struct task_struct *p) +{ -+ static const int policy_to_prio[] = { -+ NORMAL_PRIO, /* SCHED_NORMAL */ -+ 0, /* SCHED_FIFO */ -+ 0, /* SCHED_RR */ -+ IDLE_PRIO, /* SCHED_BATCH */ -+ ISO_PRIO, /* SCHED_ISO */ -+ IDLE_PRIO /* SCHED_IDLE */ -+ }; -+ + if (task_has_rt_policy(p)) + return MAX_RT_PRIO - 1 - p->rt_priority; -+ return policy_to_prio[p->policy]; ++ ++ return p->static_prio + MAX_PRIORITY_ADJ; +} + +/* @@ -2205,11 +1895,9 @@ index 000000000000..02d7d5a67c77 + */ +static void activate_task(struct task_struct *p, struct rq *rq) +{ -+ if (task_contributes_to_load(p)) -+ rq->nr_uninterruptible--; + enqueue_task(p, rq, ENQUEUE_WAKEUP); -+ p->on_rq = 1; -+ cpufreq_update_this_cpu(rq, 0); ++ p->on_rq = TASK_ON_RQ_QUEUED; ++ cpufreq_update_util(rq, 0); +} + +/* @@ -2219,11 +1907,9 @@ index 000000000000..02d7d5a67c77 + */ +static inline void deactivate_task(struct task_struct *p, struct rq *rq) +{ -+ if (task_contributes_to_load(p)) -+ rq->nr_uninterruptible++; + dequeue_task(p, rq, DEQUEUE_SLEEP); + p->on_rq = 0; -+ cpufreq_update_this_cpu(rq, 0); ++ cpufreq_update_util(rq, 0); +} + +static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) @@ -2290,7 +1976,7 @@ index 000000000000..02d7d5a67c77 + */ +static inline bool is_cpu_allowed(struct task_struct *p, int cpu) +{ -+ if (!cpumask_test_cpu(cpu, &p->cpus_mask)) ++ if (!cpumask_test_cpu(cpu, p->cpus_ptr)) + return false; + + if (is_per_cpu_kthread(p)) @@ -2323,7 +2009,7 @@ index 000000000000..02d7d5a67c77 +{ + lockdep_assert_held(&rq->lock); + -+ p->on_rq = TASK_ON_RQ_MIGRATING; ++ WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); + dequeue_task(p, rq, 0); + set_task_cpu(p, new_cpu); + raw_spin_unlock(&rq->lock); @@ -2334,7 +2020,7 @@ index 000000000000..02d7d5a67c77 + BUG_ON(task_cpu(p) != new_cpu); + enqueue_task(p, rq, 0); + p->on_rq = TASK_ON_RQ_QUEUED; -+ check_preempt_curr(rq, p); ++ check_preempt_curr(rq); + + return rq; +} @@ -2380,6 +2066,12 @@ index 000000000000..02d7d5a67c77 + * be on another CPU but it doesn't matter. + */ + local_irq_disable(); ++ /* ++ * We need to explicitly wake pending tasks before running ++ * __migrate_task() such that we will not miss enforcing cpus_ptr ++ * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. ++ */ ++ flush_smp_call_function_from_idle(); + + raw_spin_lock(&p->pi_lock); + raw_spin_lock(&rq->lock); @@ -2388,9 +2080,8 @@ index 000000000000..02d7d5a67c77 + * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because + * we're holding p->pi_lock. + */ -+ if (task_rq(p) == rq) -+ if (task_on_rq_queued(p)) -+ rq = __migrate_task(rq, p, arg->dest_cpu); ++ if (task_rq(p) == rq && task_on_rq_queued(p)) ++ rq = __migrate_task(rq, p, arg->dest_cpu); + raw_spin_unlock(&rq->lock); + raw_spin_unlock(&p->pi_lock); + @@ -2411,13 +2102,6 @@ index 000000000000..02d7d5a67c77 +} +#endif + -+/* Enter with rq lock held. We know p is on the local CPU */ -+static inline void __set_tsk_resched(struct task_struct *p) -+{ -+ set_tsk_need_resched(p); -+ set_preempt_need_resched(); -+} -+ +/** + * task_curr - is this task currently executing on a CPU? + * @p: the task in question. @@ -2559,7 +2243,7 @@ index 000000000000..02d7d5a67c77 +EXPORT_SYMBOL_GPL(kick_process); + +/* -+ * ->cpus_mask is protected by both rq->lock and p->pi_lock ++ * ->cpus_ptr is protected by both rq->lock and p->pi_lock + * + * A few notes on cpu_active vs cpu_online: + * @@ -2599,14 +2283,14 @@ index 000000000000..02d7d5a67c77 + for_each_cpu(dest_cpu, nodemask) { + if (!cpu_active(dest_cpu)) + continue; -+ if (cpumask_test_cpu(dest_cpu, &p->cpus_mask)) ++ if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) + return dest_cpu; + } + } + + for (;;) { + /* Any allowed, online CPU? */ -+ for_each_cpu(dest_cpu, &p->cpus_mask) { ++ for_each_cpu(dest_cpu, p->cpus_ptr) { + if (!is_cpu_allowed(p, dest_cpu)) + continue; + goto out; @@ -2620,7 +2304,7 @@ index 000000000000..02d7d5a67c77 + state = possible; + break; + } -+ /* Fall-through */ ++ fallthrough; + case possible: + do_set_cpus_allowed(p, cpu_possible_mask); + state = fail; @@ -2648,119 +2332,163 @@ index 000000000000..02d7d5a67c77 + return dest_cpu; +} + -+static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) ++static inline int select_task_rq(struct task_struct *p, struct rq *rq) +{ -+ cpumask_t *mask; ++ cpumask_t chk_mask, tmp; + -+ if (cpumask_test_cpu(cpu, cpumask)) -+ return cpu; ++ if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_online_mask))) ++ return select_fallback_rq(task_cpu(p), p); + -+ mask = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]); -+ while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) -+ mask++; ++ if ( ++#ifdef CONFIG_SCHED_SMT ++ cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || ++#endif ++ cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || ++ cpumask_and(&tmp, &chk_mask, ++ &sched_rq_watermark[task_sched_prio(p, rq) + 1])) ++ return best_mask_cpu(task_cpu(p), &tmp); + -+ return cpu; ++ return best_mask_cpu(task_cpu(p), &chk_mask); ++} ++ ++void sched_set_stop_task(int cpu, struct task_struct *stop) ++{ ++ struct sched_param stop_param = { .sched_priority = STOP_PRIO }; ++ struct sched_param start_param = { .sched_priority = 0 }; ++ struct task_struct *old_stop = cpu_rq(cpu)->stop; ++ ++ if (stop) { ++ /* ++ * Make it appear like a SCHED_FIFO task, its something ++ * userspace knows about and won't get confused about. ++ * ++ * Also, it will make PI more or less work without too ++ * much confusion -- but then, stop work should not ++ * rely on PI working anyway. ++ */ ++ sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param); ++ } ++ ++ cpu_rq(cpu)->stop = stop; ++ ++ if (old_stop) { ++ /* ++ * Reset it back to a normal scheduling policy so that ++ * it can die in pieces. ++ */ ++ sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param); ++ } +} + +/* -+ * task_preemptible_rq - return the rq which the given task can preempt on -+ * @p: task wants to preempt CPU -+ * @only_preempt_low_policy: indicate only preempt rq running low policy than @p ++ * Change a given task's CPU affinity. Migrate the thread to a ++ * proper CPU and schedule it away if the CPU it's executing on ++ * is removed from the allowed bitmask. ++ * ++ * NOTE: the caller must have a valid reference to the task, the ++ * task must not exit() & deallocate itself prematurely. The ++ * call is not atomic; no spinlocks may be held. + */ -+static inline int -+task_preemptible_rq_idle(struct task_struct *p, cpumask_t *chk_mask) ++static int __set_cpus_allowed_ptr(struct task_struct *p, ++ const struct cpumask *new_mask, bool check) +{ -+ cpumask_t tmp; ++ const struct cpumask *cpu_valid_mask = cpu_active_mask; ++ int dest_cpu; ++ unsigned long flags; ++ struct rq *rq; ++ raw_spinlock_t *lock; ++ int ret = 0; + -+#ifdef CONFIG_SCHED_SMT -+ if (cpumask_and(&tmp, chk_mask, &sched_cpu_sg_idle_mask)) -+ return best_mask_cpu(task_cpu(p), &tmp); -+#endif ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ rq = __task_access_lock(p, &lock); + -+#ifdef CONFIG_SMT_NICE -+ /* Only ttwu on cpu which is not smt supressed */ -+ if (cpumask_andnot(&tmp, chk_mask, &sched_smt_supressed_mask)) { -+ cpumask_t t; -+ if (cpumask_and(&t, &tmp, &sched_rq_queued_masks[SCHED_RQ_EMPTY])) -+ return best_mask_cpu(task_cpu(p), &t); -+ return best_mask_cpu(task_cpu(p), &tmp); ++ if (p->flags & PF_KTHREAD) { ++ /* ++ * Kernel threads are allowed on online && !active CPUs ++ */ ++ cpu_valid_mask = cpu_online_mask; + } -+#endif + -+ if (cpumask_and(&tmp, chk_mask, &sched_rq_queued_masks[SCHED_RQ_EMPTY])) -+ return best_mask_cpu(task_cpu(p), &tmp); -+ return best_mask_cpu(task_cpu(p), chk_mask); ++ /* ++ * Must re-check here, to close a race against __kthread_bind(), ++ * sched_setaffinity() is not guaranteed to observe the flag. ++ */ ++ if (check && (p->flags & PF_NO_SETAFFINITY)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (cpumask_equal(&p->cpus_mask, new_mask)) ++ goto out; ++ ++ dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); ++ if (dest_cpu >= nr_cpu_ids) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ do_set_cpus_allowed(p, new_mask); ++ ++ if (p->flags & PF_KTHREAD) { ++ /* ++ * For kernel threads that do indeed end up on online && ++ * !active we want to ensure they are strict per-CPU threads. ++ */ ++ WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && ++ !cpumask_intersects(new_mask, cpu_active_mask) && ++ p->nr_cpus_allowed != 1); ++ } ++ ++ /* Can the task run on the task's current CPU? If so, we're done */ ++ if (cpumask_test_cpu(task_cpu(p), new_mask)) ++ goto out; ++ ++ if (task_running(p) || p->state == TASK_WAKING) { ++ struct migration_arg arg = { p, dest_cpu }; ++ ++ /* Need help from migration thread: drop lock and wait. */ ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); ++ return 0; ++ } ++ if (task_on_rq_queued(p)) { ++ /* ++ * OK, since we're going to drop the lock immediately ++ * afterwards anyway. ++ */ ++ update_rq_clock(rq); ++ rq = move_queued_task(rq, p, dest_cpu); ++ lock = &rq->lock; ++ } ++ ++out: ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ ++ return ret; +} + -+static inline int -+task_preemptible_rq(struct task_struct *p, cpumask_t *chk_mask, -+ int preempt_level) ++int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) +{ -+ cpumask_t tmp; -+ int level; -+ -+#ifdef CONFIG_SCHED_SMT -+#ifdef CONFIG_SMT_NICE -+ if (cpumask_and(&tmp, chk_mask, &sched_cpu_psg_mask)) -+ return best_mask_cpu(task_cpu(p), &tmp); -+#else -+ if (cpumask_and(&tmp, chk_mask, &sched_cpu_sg_idle_mask)) -+ return best_mask_cpu(task_cpu(p), &tmp); -+#endif -+#endif -+ -+ level = find_first_bit(sched_rq_queued_masks_bitmap, -+ NR_SCHED_RQ_QUEUED_LEVEL); -+ -+ while (level < preempt_level) { -+ if (cpumask_and(&tmp, chk_mask, &sched_rq_queued_masks[level])) -+ return best_mask_cpu(task_cpu(p), &tmp); -+ -+ level = find_next_bit(sched_rq_queued_masks_bitmap, -+ NR_SCHED_RQ_QUEUED_LEVEL, -+ level + 1); -+ } -+ -+ if (unlikely(SCHED_RQ_RT == level && -+ level == preempt_level && -+ cpumask_and(&tmp, chk_mask, -+ &sched_rq_queued_masks[SCHED_RQ_RT]))) { -+ unsigned int cpu; -+ -+ for_each_cpu (cpu, &tmp) -+ if (p->prio < sched_rq_prio[cpu]) -+ return cpu; -+ } -+ -+ return best_mask_cpu(task_cpu(p), chk_mask); ++ return __set_cpus_allowed_ptr(p, new_mask, false); +} ++EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); + -+static inline int select_task_rq(struct task_struct *p) -+{ -+ cpumask_t chk_mask; -+ -+ if (unlikely(!cpumask_and(&chk_mask, &p->cpus_mask, cpu_online_mask))) -+ return select_fallback_rq(task_cpu(p), p); -+ -+ /* Check IDLE tasks suitable to run normal priority */ -+ if (idleprio_task(p)) { -+ if (idleprio_suitable(p)) { -+ p->prio = p->normal_prio; -+ update_task_priodl(p); -+ return task_preemptible_rq_idle(p, &chk_mask); -+ } -+ p->prio = NORMAL_PRIO; -+ update_task_priodl(p); -+ } -+ -+ return task_preemptible_rq(p, &chk_mask, -+ task_running_policy_level(p, this_rq())); -+} +#else /* CONFIG_SMP */ -+static inline int select_task_rq(struct task_struct *p) ++ ++static inline int select_task_rq(struct task_struct *p, struct rq *rq) +{ + return 0; +} ++ ++static inline int ++__set_cpus_allowed_ptr(struct task_struct *p, ++ const struct cpumask *new_mask, bool check) ++{ ++ return set_cpus_allowed_ptr(p, new_mask); ++} ++ +#endif /* CONFIG_SMP */ + +static void @@ -2777,7 +2505,7 @@ index 000000000000..02d7d5a67c77 + if (cpu == rq->cpu) + __schedstat_inc(rq->ttwu_local); + else { -+ /** PDS ToDo: ++ /** Alt schedule FW ToDo: + * How to do ttwu_wake_remote + */ + } @@ -2792,6 +2520,7 @@ index 000000000000..02d7d5a67c77 +static inline void +ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) +{ ++ check_preempt_curr(rq); + p->state = TASK_RUNNING; + trace_sched_wakeup(p); +} @@ -2799,16 +2528,39 @@ index 000000000000..02d7d5a67c77 +static inline void +ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) +{ -+#ifdef CONFIG_SMP + if (p->sched_contributes_to_load) + rq->nr_uninterruptible--; -+#endif + + activate_task(p, rq); + ttwu_do_wakeup(rq, p, 0); +} + -+static int ttwu_remote(struct task_struct *p, int wake_flags) ++/* ++ * Consider @p being inside a wait loop: ++ * ++ * for (;;) { ++ * set_current_state(TASK_UNINTERRUPTIBLE); ++ * ++ * if (CONDITION) ++ * break; ++ * ++ * schedule(); ++ * } ++ * __set_current_state(TASK_RUNNING); ++ * ++ * between set_current_state() and schedule(). In this case @p is still ++ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in ++ * an atomic manner. ++ * ++ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq ++ * then schedule() must still happen and p->state can be changed to ++ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we ++ * need to do a full wakeup with enqueue. ++ * ++ * Returns: %true when the wakeup is done, ++ * %false otherwise. ++ */ ++static int ttwu_runnable(struct task_struct *p, int wake_flags) +{ + struct rq *rq; + raw_spinlock_t *lock; @@ -2816,6 +2568,8 @@ index 000000000000..02d7d5a67c77 + + rq = __task_access_lock(p, &lock); + if (task_on_rq_queued(p)) { ++ /* check_preempt_curr() may use rq clock */ ++ update_rq_clock(rq); + ttwu_do_wakeup(rq, p, wake_flags); + ret = 1; + } @@ -2824,6 +2578,151 @@ index 000000000000..02d7d5a67c77 + return ret; +} + ++#ifdef CONFIG_SMP ++void sched_ttwu_pending(void *arg) ++{ ++ struct llist_node *llist = arg; ++ struct rq *rq = this_rq(); ++ struct task_struct *p, *t; ++ struct rq_flags rf; ++ ++ if (!llist) ++ return; ++ ++ /* ++ * rq::ttwu_pending racy indication of out-standing wakeups. ++ * Races such that false-negatives are possible, since they ++ * are shorter lived that false-positives would be. ++ */ ++ WRITE_ONCE(rq->ttwu_pending, 0); ++ ++ rq_lock_irqsave(rq, &rf); ++ update_rq_clock(rq); ++ ++ llist_for_each_entry_safe(p, t, llist, wake_entry.llist) { ++ if (WARN_ON_ONCE(p->on_cpu)) ++ smp_cond_load_acquire(&p->on_cpu, !VAL); ++ ++ if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq))) ++ set_task_cpu(p, cpu_of(rq)); ++ ++ ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0); ++ } ++ ++ rq_unlock_irqrestore(rq, &rf); ++} ++ ++void send_call_function_single_ipi(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ if (!set_nr_if_polling(rq->idle)) ++ arch_send_call_function_single_ipi(cpu); ++ else ++ trace_sched_wake_idle_without_ipi(cpu); ++} ++ ++/* ++ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if ++ * necessary. The wakee CPU on receipt of the IPI will queue the task ++ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost ++ * of the wakeup instead of the waker. ++ */ ++static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); ++ ++ WRITE_ONCE(rq->ttwu_pending, 1); ++ __smp_call_single_queue(cpu, &p->wake_entry.llist); ++} ++ ++static inline bool ttwu_queue_cond(int cpu, int wake_flags) ++{ ++ /* ++ * If the CPU does not share cache, then queue the task on the ++ * remote rqs wakelist to avoid accessing remote data. ++ */ ++ if (!cpus_share_cache(smp_processor_id(), cpu)) ++ return true; ++ ++ /* ++ * If the task is descheduling and the only running task on the ++ * CPU then use the wakelist to offload the task activation to ++ * the soon-to-be-idle CPU as the current CPU is likely busy. ++ * nr_running is checked to avoid unnecessary task stacking. ++ */ ++ if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1) ++ return true; ++ ++ return false; ++} ++ ++static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) ++{ ++ if (__is_defined(ALT_SCHED_TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) { ++ if (WARN_ON_ONCE(cpu == smp_processor_id())) ++ return false; ++ ++ sched_clock_cpu(cpu); /* Sync clocks across CPUs */ ++ __ttwu_queue_wakelist(p, cpu, wake_flags); ++ return true; ++ } ++ ++ return false; ++} ++ ++void wake_up_if_idle(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ unsigned long flags; ++ ++ rcu_read_lock(); ++ ++ if (!is_idle_task(rcu_dereference(rq->curr))) ++ goto out; ++ ++ if (set_nr_if_polling(rq->idle)) { ++ trace_sched_wake_idle_without_ipi(cpu); ++ } else { ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ if (is_idle_task(rq->curr)) ++ smp_send_reschedule(cpu); ++ /* Else CPU is not idle, do nothing here */ ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++ } ++ ++out: ++ rcu_read_unlock(); ++} ++ ++bool cpus_share_cache(int this_cpu, int that_cpu) ++{ ++ return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); ++} ++#else /* !CONFIG_SMP */ ++ ++static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) ++{ ++ return false; ++} ++ ++#endif /* CONFIG_SMP */ ++ ++static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ if (ttwu_queue_wakelist(p, cpu, wake_flags)) ++ return; ++ ++ raw_spin_lock(&rq->lock); ++ update_rq_clock(rq); ++ ttwu_do_activate(rq, p, wake_flags); ++ raw_spin_unlock(&rq->lock); ++} ++ +/* + * Notes on Program-Order guarantees on SMP systems. + * @@ -2873,8 +2772,8 @@ index 000000000000..02d7d5a67c77 + * migration. However the means are completely different as there is no lock + * chain to provide order. Instead we do: + * -+ * 1) smp_store_release(X->on_cpu, 0) -+ * 2) smp_cond_load_acquire(!X->on_cpu) ++ * 1) smp_store_release(X->on_cpu, 0) -- finish_task() ++ * 2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up() + * + * Example: + * @@ -2915,51 +2814,94 @@ index 000000000000..02d7d5a67c77 + * + */ + -+/*** ++/** + * try_to_wake_up - wake up a thread + * @p: the thread to be awakened + * @state: the mask of task states that can be woken + * @wake_flags: wake modifier flags (WF_*) + * -+ * Put it on the run-queue if it's not already there. The "current" -+ * thread is always on the run-queue (except when the actual -+ * re-schedule is in progress), and as such you're allowed to do -+ * the simpler "current->state = TASK_RUNNING" to mark yourself -+ * runnable without the overhead of this. ++ * Conceptually does: + * -+ * Return: %true if @p was woken up, %false if it was already running. -+ * or @state didn't match @p's state. ++ * If (@state & @p->state) @p->state = TASK_RUNNING. ++ * ++ * If the task was not queued/runnable, also place it back on a runqueue. ++ * ++ * This function is atomic against schedule() which would dequeue the task. ++ * ++ * It issues a full memory barrier before accessing @p->state, see the comment ++ * with set_current_state(). ++ * ++ * Uses p->pi_lock to serialize against concurrent wake-ups. ++ * ++ * Relies on p->pi_lock stabilizing: ++ * - p->sched_class ++ * - p->cpus_ptr ++ * - p->sched_task_group ++ * in order to do migration, see its use of select_task_rq()/set_task_cpu(). ++ * ++ * Tries really hard to only take one task_rq(p)->lock for performance. ++ * Takes rq->lock in: ++ * - ttwu_runnable() -- old rq, unavoidable, see comment there; ++ * - ttwu_queue() -- new rq, for enqueue of the task; ++ * - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us. ++ * ++ * As a consequence we race really badly with just about everything. See the ++ * many memory barriers and their comments for details. ++ * ++ * Return: %true if @p->state changes (an actual wakeup was done), ++ * %false otherwise. + */ +static int try_to_wake_up(struct task_struct *p, unsigned int state, + int wake_flags) +{ + unsigned long flags; -+ struct rq *rq; + int cpu, success = 0; + ++ preempt_disable(); ++ if (p == current) { ++ /* ++ * We're waking current, this means 'p->on_rq' and 'task_cpu(p) ++ * == smp_processor_id()'. Together this means we can special ++ * case the whole 'p->on_rq && ttwu_runnable()' case below ++ * without taking any locks. ++ * ++ * In particular: ++ * - we rely on Program-Order guarantees for all the ordering, ++ * - we're serialized against set_special_state() by virtue of ++ * it disabling IRQs (this allows not taking ->pi_lock). ++ */ ++ if (!(p->state & state)) ++ goto out; ++ ++ success = 1; ++ trace_sched_waking(p); ++ p->state = TASK_RUNNING; ++ trace_sched_wakeup(p); ++ goto out; ++ } ++ + /* + * If we are going to wake up a thread waiting for CONDITION we + * need to ensure that CONDITION=1 done by the caller can not be -+ * reordered with p->state check below. This pairs with mb() in -+ * set_current_state() the waiting thread does. ++ * reordered with p->state check below. This pairs with smp_store_mb() ++ * in set_current_state() that the waiting thread does. + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + smp_mb__after_spinlock(); + if (!(p->state & state)) -+ goto out; ++ goto unlock; + + trace_sched_waking(p); + + /* We're going to change ->state: */ + success = 1; -+ cpu = task_cpu(p); + + /* + * Ensure we load p->on_rq _after_ p->state, otherwise it would + * be possible to, falsely, observe p->on_rq == 0 and get stuck + * in smp_cond_load_acquire() below. + * -+ * flush_smp_call_function_from_idle() try_to_wake_up() ++ * sched_ttwu_pending() try_to_wake_up() + * STORE p->on_rq = 1 LOAD p->state + * UNLOCK rq->lock + * @@ -2973,10 +2915,17 @@ index 000000000000..02d7d5a67c77 + * + * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in + * __schedule(). See the comment for smp_mb__after_spinlock(). ++ * ++ * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). + */ + smp_rmb(); -+ if (p->on_rq && ttwu_remote(p, wake_flags)) -+ goto stat; ++ if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) ++ goto unlock; ++ ++ if (p->in_iowait) { ++ delayacct_blkio_end(p); ++ atomic_dec(&task_rq(p)->nr_iowait); ++ } + +#ifdef CONFIG_SMP + /* @@ -2997,8 +2946,43 @@ index 000000000000..02d7d5a67c77 + * + * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in + * __schedule(). See the comment for smp_mb__after_spinlock(). ++ * ++ * Form a control-dep-acquire with p->on_rq == 0 above, to ensure ++ * schedule()'s deactivate_task() has 'happened' and p will no longer ++ * care about it's own p->state. See the comment in __schedule(). + */ -+ smp_rmb(); ++ smp_acquire__after_ctrl_dep(); ++ ++ /* ++ * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq ++ * == 0), which means we need to do an enqueue, change p->state to ++ * TASK_WAKING such that we can unlock p->pi_lock before doing the ++ * enqueue, such as ttwu_queue_wakelist(). ++ */ ++ p->state = TASK_WAKING; ++ ++ /* ++ * If the owning (remote) CPU is still in the middle of schedule() with ++ * this task as prev, considering queueing p on the remote CPUs wake_list ++ * which potentially sends an IPI instead of spinning on p->on_cpu to ++ * let the waker make forward progress. This is safe because IRQs are ++ * disabled and the IPI will deliver after on_cpu is cleared. ++ * ++ * Ensure we load task_cpu(p) after p->on_cpu: ++ * ++ * set_task_cpu(p, cpu); ++ * STORE p->cpu = @cpu ++ * __schedule() (switch to task 'p') ++ * LOCK rq->lock ++ * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu) ++ * STORE p->on_cpu = 1 LOAD p->cpu ++ * ++ * to ensure we observe the correct CPU on which the task is currently ++ * scheduling. ++ */ ++ if (smp_load_acquire(&p->on_cpu) && ++ ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) ++ goto unlock; + + /* + * If the owning (remote) CPU is still in the middle of schedule() with @@ -3011,47 +2995,26 @@ index 000000000000..02d7d5a67c77 + */ + smp_cond_load_acquire(&p->on_cpu, !VAL); + -+ p->sched_contributes_to_load = !!task_contributes_to_load(p); -+ p->state = TASK_WAKING; ++ sched_task_ttwu(p); + -+ if (p->in_iowait) { -+ delayacct_blkio_end(p); -+ atomic_dec(&task_rq(p)->nr_iowait); -+ } -+ -+ if (SCHED_ISO == p->policy && ISO_PRIO != p->prio) { -+ p->prio = ISO_PRIO; -+ p->deadline = 0UL; -+ update_task_priodl(p); -+ } -+ -+ cpu = select_task_rq(p); ++ cpu = select_task_rq(p, this_rq()); + + if (cpu != task_cpu(p)) { + wake_flags |= WF_MIGRATED; + psi_ttwu_dequeue(p); + set_task_cpu(p, cpu); + } -+#else /* CONFIG_SMP */ -+ if (p->in_iowait) { -+ delayacct_blkio_end(p); -+ atomic_dec(&task_rq(p)->nr_iowait); -+ } -+#endif ++#else ++ cpu = task_cpu(p); ++#endif /* CONFIG_SMP */ + -+ rq = cpu_rq(cpu); -+ raw_spin_lock(&rq->lock); -+ -+ update_rq_clock(rq); -+ ttwu_do_activate(rq, p, wake_flags); -+ check_preempt_curr(rq, p); -+ -+ raw_spin_unlock(&rq->lock); -+ -+stat: -+ ttwu_stat(p, cpu, wake_flags); -+out: ++ ttwu_queue(p, cpu, wake_flags); ++unlock: + raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++out: ++ if (success) ++ ttwu_stat(p, task_cpu(p), wake_flags); ++ preempt_enable(); + + return success; +} @@ -3086,7 +3049,7 @@ index 000000000000..02d7d5a67c77 + rq = __task_rq_lock(p, &rf); + if (task_rq(p) == rq) + ret = func(p, arg); -+ rq_unlock(rq, &rf); ++ __task_rq_unlock(rq, &rf); + } else { + switch (p->state) { + case TASK_RUNNING: @@ -3127,30 +3090,38 @@ index 000000000000..02d7d5a67c77 +/* + * Perform scheduler related setup for a newly forked process p. + * p is forked by current. ++ * ++ * __sched_fork() is basic setup used by init_idle() too: + */ -+int sched_fork(unsigned long __maybe_unused clone_flags, struct task_struct *p) ++static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p) +{ -+ unsigned long flags; -+ int cpu = get_cpu(); -+ struct rq *rq = this_rq(); ++ p->on_rq = 0; ++ p->on_cpu = 0; ++ p->utime = 0; ++ p->stime = 0; ++ p->sched_time = 0; + +#ifdef CONFIG_PREEMPT_NOTIFIERS + INIT_HLIST_HEAD(&p->preempt_notifiers); +#endif -+ /* Should be reset in fork.c but done here for ease of PDS patching */ -+ p->on_cpu = -+ p->on_rq = -+ p->utime = -+ p->stime = -+ p->sched_time = 0; -+ -+ p->sl_level = pds_skiplist_random_level(p); -+ INIT_SKIPLIST_NODE(&p->sl_node); + +#ifdef CONFIG_COMPACTION + p->capture_control = NULL; +#endif ++#ifdef CONFIG_SMP ++ p->wake_entry.u_flags = CSD_TYPE_TTWU; ++#endif ++} + ++/* ++ * fork()/clone()-time setup: ++ */ ++int sched_fork(unsigned long clone_flags, struct task_struct *p) ++{ ++ unsigned long flags; ++ struct rq *rq; ++ ++ __sched_fork(clone_flags, p); + /* + * We mark the process as NEW here. This guarantees that + * nobody will actually run it, and a signal or other external @@ -3184,26 +3155,6 @@ index 000000000000..02d7d5a67c77 + } + + /* -+ * Share the timeslice between parent and child, thus the -+ * total amount of pending timeslices in the system doesn't change, -+ * resulting in more scheduling fairness. -+ */ -+ raw_spin_lock_irqsave(&rq->lock, flags); -+ rq->curr->time_slice /= 2; -+ p->time_slice = rq->curr->time_slice; -+#ifdef CONFIG_SCHED_HRTICK -+ hrtick_start(rq, US_TO_NS(rq->curr->time_slice)); -+#endif -+ -+ if (p->time_slice < RESCHED_US) { -+ update_rq_clock(rq); -+ time_slice_expired(p, rq); -+ resched_curr(rq); -+ } else -+ update_task_priodl(p); -+ raw_spin_unlock_irqrestore(&rq->lock, flags); -+ -+ /* + * The child is not yet in the pid-hash so no cgroup attach races, + * and the cgroup is pinned to this child due to cgroup_fork() + * is ran before sched_fork(). @@ -3212,10 +3163,32 @@ index 000000000000..02d7d5a67c77 + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + /* ++ * Share the timeslice between parent and child, thus the ++ * total amount of pending timeslices in the system doesn't change, ++ * resulting in more scheduling fairness. ++ */ ++ rq = this_rq(); ++ raw_spin_lock(&rq->lock); ++ ++ rq->curr->time_slice /= 2; ++ p->time_slice = rq->curr->time_slice; ++#ifdef CONFIG_SCHED_HRTICK ++ hrtick_start(rq, rq->curr->time_slice); ++#endif ++ ++ if (p->time_slice < RESCHED_NS) { ++ p->time_slice = sched_timeslice_ns; ++ resched_curr(rq); ++ } ++ sched_task_fork(p, rq); ++ raw_spin_unlock(&rq->lock); ++ ++ rseq_migrate(p); ++ /* + * We're setting the CPU for the first time, we don't migrate, + * so use __set_task_cpu(). + */ -+ __set_task_cpu(p, cpu); ++ __set_task_cpu(p, cpu_of(rq)); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + +#ifdef CONFIG_SCHED_INFO @@ -3224,10 +3197,11 @@ index 000000000000..02d7d5a67c77 +#endif + init_task_preempt_count(p); + -+ put_cpu(); + return 0; +} + ++void sched_post_fork(struct task_struct *p) {} ++ +#ifdef CONFIG_SCHEDSTATS + +DEFINE_STATIC_KEY_FALSE(sched_schedstats); @@ -3321,11 +3295,12 @@ index 000000000000..02d7d5a67c77 + + p->state = TASK_RUNNING; + -+ rq = cpu_rq(select_task_rq(p)); ++ rq = cpu_rq(select_task_rq(p, this_rq())); +#ifdef CONFIG_SMP ++ rseq_migrate(p); + /* + * Fork balancing, do it here and not earlier because: -+ * - cpus_mask can change in the fork path ++ * - cpus_ptr can change in the fork path + * - any previously selected CPU might disappear through hotplug + * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, + * as we're not fully set-up yet. @@ -3338,7 +3313,7 @@ index 000000000000..02d7d5a67c77 + update_rq_clock(rq); + activate_task(p, rq); + trace_sched_wakeup_new(p); -+ check_preempt_curr(rq, p); ++ check_preempt_curr(rq); + + raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); @@ -3436,16 +3411,19 @@ index 000000000000..02d7d5a67c77 + /* + * Claim the task as running, we do this before switching to it + * such that any running task will have this set. ++ * ++ * See the ttwu() WF_ON_CPU case and its ordering comment. + */ -+ next->on_cpu = 1; ++ WRITE_ONCE(next->on_cpu, 1); +} + +static inline void finish_task(struct task_struct *prev) +{ +#ifdef CONFIG_SMP + /* -+ * After ->on_cpu is cleared, the task can be moved to a different CPU. -+ * We must ensure this doesn't happen until the switch is completely ++ * This must be the very last reference to @prev from this CPU. After ++ * p->on_cpu is cleared, the task can be moved to a different CPU. We ++ * must ensure this doesn't happen until the switch is completely + * finished. + * + * In particular, the load of prev->state in finish_task_switch() must @@ -3793,21 +3771,57 @@ index 000000000000..02d7d5a67c77 + return sum; +} + ++#ifdef CONFIG_SMP ++ ++/* ++ * sched_exec - execve() is a valuable balancing opportunity, because at ++ * this point the task has the smallest effective memory and cache ++ * footprint. ++ */ ++void sched_exec(void) ++{ ++ struct task_struct *p = current; ++ unsigned long flags; ++ int dest_cpu; ++ struct rq *rq; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ rq = this_rq(); ++ ++ if (rq != task_rq(p) || rq->nr_running < 2) ++ goto unlock; ++ ++ dest_cpu = select_task_rq(p, task_rq(p)); ++ if (dest_cpu == smp_processor_id()) ++ goto unlock; ++ ++ if (likely(cpu_active(dest_cpu))) { ++ struct migration_arg arg = { p, dest_cpu }; ++ ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); ++ return; ++ } ++unlock: ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++} ++ ++#endif ++ +DEFINE_PER_CPU(struct kernel_stat, kstat); +DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat); + +EXPORT_PER_CPU_SYMBOL(kstat); +EXPORT_PER_CPU_SYMBOL(kernel_cpustat); + -+static inline void pds_update_curr(struct rq *rq, struct task_struct *p) ++static inline void update_curr(struct rq *rq, struct task_struct *p) +{ + s64 ns = rq->clock_task - p->last_ran; + + p->sched_time += ns; + account_group_exec_runtime(p, ns); + -+ /* time_slice accounting is done in usecs to avoid overflow on 32bit */ -+ p->time_slice -= NS_TO_US(ns); ++ p->time_slice -= ns; + p->last_ran = rq->clock_task; +} + @@ -3847,7 +3861,7 @@ index 000000000000..02d7d5a67c77 + */ + if (p == rq->curr && task_on_rq_queued(p)) { + update_rq_clock(rq); -+ pds_update_curr(rq, p); ++ update_curr(rq, p); + } + ns = tsk_seruntime(p); + task_access_unlock_irqrestore(p, lock, &flags); @@ -3856,126 +3870,26 @@ index 000000000000..02d7d5a67c77 +} + +/* This manages tasks that have run out of timeslice during a scheduler_tick */ -+static inline void pds_scheduler_task_tick(struct rq *rq) ++static inline void scheduler_task_tick(struct rq *rq) +{ + struct task_struct *p = rq->curr; + + if (is_idle_task(p)) + return; + -+ pds_update_curr(rq, p); -+ ++ update_curr(rq, p); + cpufreq_update_util(rq, 0); + + /* -+ * Tasks that were scheduled in the first half of a tick are not -+ * allowed to run into the 2nd half of the next tick if they will -+ * run out of time slice in the interim. Otherwise, if they have -+ * less than RESCHED_US μs of time slice left they will be rescheduled. ++ * Tasks have less than RESCHED_NS of time slice left they will be ++ * rescheduled. + */ -+ if (p->time_slice - rq->dither >= RESCHED_US) ++ if (p->time_slice >= RESCHED_NS) + return; -+ -+ /** -+ * p->time_slice < RESCHED_US. We will modify task_struct under -+ * rq lock as p is rq->curr -+ */ -+ __set_tsk_resched(p); ++ set_tsk_need_resched(p); ++ set_preempt_need_resched(); +} + -+#ifdef CONFIG_SMP -+ -+#ifdef CONFIG_SCHED_SMT -+static int active_load_balance_cpu_stop(void *data) -+{ -+ struct rq *rq = this_rq(); -+ struct task_struct *p = data; -+ int cpu; -+ unsigned long flags; -+ -+ local_irq_save(flags); -+ -+ raw_spin_lock(&p->pi_lock); -+ raw_spin_lock(&rq->lock); -+ -+ rq->active_balance = 0; -+ /* -+ * _something_ may have changed the task, double check again -+ */ -+ if (task_on_rq_queued(p) && task_rq(p) == rq && -+ (cpu = cpumask_any_and(&p->cpus_mask, &sched_cpu_sg_idle_mask)) < nr_cpu_ids) -+ rq = __migrate_task(rq, p, cpu); -+ -+ raw_spin_unlock(&rq->lock); -+ raw_spin_unlock(&p->pi_lock); -+ -+ local_irq_restore(flags); -+ -+ return 0; -+} -+ -+/* pds_sg_balance_trigger - trigger slibing group balance for @cpu */ -+static void pds_sg_balance_trigger(const int cpu) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ unsigned long flags; -+ struct task_struct *curr; -+ -+ if (!raw_spin_trylock_irqsave(&rq->lock, flags)) -+ return; -+ curr = rq->curr; -+ if (!is_idle_task(curr) && -+ cpumask_intersects(&curr->cpus_mask, &sched_cpu_sg_idle_mask)) { -+ int active_balance = 0; -+ -+ if (likely(!rq->active_balance)) { -+ rq->active_balance = 1; -+ active_balance = 1; -+ } -+ -+ raw_spin_unlock_irqrestore(&rq->lock, flags); -+ -+ if (likely(active_balance)) -+ stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop, -+ curr, &rq->active_balance_work); -+ } else -+ raw_spin_unlock_irqrestore(&rq->lock, flags); -+} -+ -+/* -+ * pds_sg_balance_check - slibing group balance check for run queue @rq -+ */ -+static inline void pds_sg_balance_check(const struct rq *rq) -+{ -+ cpumask_t chk; -+ int i; -+ -+ /* Only online cpu will do sg balance checking */ -+ if (unlikely(!rq->online)) -+ return; -+ -+ /* Only cpu in slibing idle group will do the checking */ -+ if (!cpumask_test_cpu(cpu_of(rq), &sched_cpu_sg_idle_mask)) -+ return; -+ -+ /* Find potential cpus which can migrate the currently running task */ -+ if (!cpumask_andnot(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY], -+ &sched_rq_queued_masks[SCHED_RQ_EMPTY])) -+ return; -+ -+ for_each_cpu(i, &chk) { -+ /* skip the cpu which has idle slibing cpu */ -+ if (cpumask_test_cpu(per_cpu(sched_sibling_cpu, i), -+ &sched_rq_queued_masks[SCHED_RQ_EMPTY])) -+ continue; -+ pds_sg_balance_trigger(i); -+ } -+} -+DEFINE_PER_CPU(unsigned long, thermal_pressure); -+ -+#endif /* CONFIG_SCHED_SMT */ -+#endif /* CONFIG_SMP */ -+ +/* + * This function gets called by the timer code, with HZ frequency. + * We call it with interrupts disabled. @@ -3991,8 +3905,7 @@ index 000000000000..02d7d5a67c77 + raw_spin_lock(&rq->lock); + update_rq_clock(rq); + -+ pds_scheduler_task_tick(rq); -+ update_sched_rq_queued_masks_normal(rq); ++ scheduler_task_tick(rq); + calc_global_load_tick(rq); + psi_task_tick(rq); + @@ -4002,7 +3915,100 @@ index 000000000000..02d7d5a67c77 + perf_event_task_tick(); +} + ++#ifdef CONFIG_SCHED_SMT ++static inline int active_load_balance_cpu_stop(void *data) ++{ ++ struct rq *rq = this_rq(); ++ struct task_struct *p = data; ++ cpumask_t tmp; ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ ++ raw_spin_lock(&p->pi_lock); ++ raw_spin_lock(&rq->lock); ++ ++ rq->active_balance = 0; ++ /* _something_ may have changed the task, double check again */ ++ if (task_on_rq_queued(p) && task_rq(p) == rq && ++ cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) { ++ int cpu = cpu_of(rq); ++ int dcpu = __best_mask_cpu(cpu, &tmp, ++ per_cpu(sched_cpu_llc_mask, cpu)); ++ rq = move_queued_task(rq, p, dcpu); ++ } ++ ++ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock(&p->pi_lock); ++ ++ local_irq_restore(flags); ++ ++ return 0; ++} ++ ++/* sg_balance_trigger - trigger slibing group balance for @cpu */ ++static inline int sg_balance_trigger(const int cpu) ++{ ++ struct rq *rq= cpu_rq(cpu); ++ unsigned long flags; ++ struct task_struct *curr; ++ int res; ++ ++ if (!raw_spin_trylock_irqsave(&rq->lock, flags)) ++ return 0; ++ curr = rq->curr; ++ res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\ ++ cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\ ++ (!rq->active_balance); ++ ++ if (res) ++ rq->active_balance = 1; ++ ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++ ++ if (res) ++ stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop, ++ curr, &rq->active_balance_work); ++ return res; ++} ++ ++/* ++ * sg_balance_check - slibing group balance check for run queue @rq ++ */ ++static inline void sg_balance_check(struct rq *rq) ++{ ++ cpumask_t chk; ++ int cpu; ++ ++ /* exit when no sg in idle */ ++ if (cpumask_empty(&sched_sg_idle_mask)) ++ return; ++ ++ cpu = cpu_of(rq); ++ /* ++ * Only cpu in slibing idle group will do the checking and then ++ * find potential cpus which can migrate the current running task ++ */ ++ if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && ++ cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) && ++ cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM])) { ++ int i, tried = 0; ++ ++ for_each_cpu_wrap(i, &chk, cpu) { ++ if (cpumask_subset(cpu_smt_mask(i), &chk)) { ++ if (sg_balance_trigger(i)) ++ return; ++ if (tried) ++ return; ++ tried++; ++ } ++ } ++ } ++} ++#endif /* CONFIG_SCHED_SMT */ ++ +#ifdef CONFIG_NO_HZ_FULL ++ +struct tick_work { + int cpu; + atomic_t state; @@ -4073,10 +4079,9 @@ index 000000000000..02d7d5a67c77 + delta = rq_clock_task(rq) - curr->last_ran; + WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); + } -+ pds_scheduler_task_tick(rq); -+ update_sched_rq_queued_masks_normal(rq); -+ calc_load_nohz_remote(rq); ++ scheduler_task_tick(rq); + ++ calc_load_nohz_remote(rq); +out_unlock: + raw_spin_unlock_irqrestore(&rq->lock, flags); + @@ -4216,172 +4221,6 @@ index 000000000000..02d7d5a67c77 +static inline void preempt_latency_stop(int val) { } +#endif + -+/* -+ * Timeslices below RESCHED_US are considered as good as expired as there's no -+ * point rescheduling when there's so little time left. SCHED_BATCH tasks -+ * have been flagged be not latency sensitive and likely to be fully CPU -+ * bound so every time they're rescheduled they have their time_slice -+ * refilled, but get a new later deadline to have little effect on -+ * SCHED_NORMAL tasks. -+ -+ */ -+static inline void check_deadline(struct task_struct *p, struct rq *rq) -+{ -+ if (rq->idle == p) -+ return; -+ -+ pds_update_curr(rq, p); -+ -+ if (p->time_slice < RESCHED_US) { -+ time_slice_expired(p, rq); -+ if (SCHED_ISO == p->policy && ISO_PRIO == p->prio) { -+ p->prio = NORMAL_PRIO; -+ p->deadline = rq->clock + task_deadline_diff(p); -+ update_task_priodl(p); -+ } -+ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) -+ requeue_task(p, rq); -+ } -+} -+ -+#ifdef CONFIG_SMP -+ -+#define SCHED_RQ_NR_MIGRATION (32UL) -+/* -+ * Migrate pending tasks in @rq to @dest_cpu -+ * Will try to migrate mininal of half of @rq nr_running tasks and -+ * SCHED_RQ_NR_MIGRATION to @dest_cpu -+ */ -+static inline int -+migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, int filter_prio) -+{ -+ struct task_struct *p; -+ int dest_cpu = cpu_of(dest_rq); -+ int nr_migrated = 0; -+ int nr_tries = min((rq->nr_running + 1) / 2, SCHED_RQ_NR_MIGRATION); -+ struct skiplist_node *node = rq->sl_header.next[0]; -+ -+ while (nr_tries && node != &rq->sl_header) { -+ p = skiplist_entry(node, struct task_struct, sl_node); -+ node = node->next[0]; -+ -+ if (task_running(p)) -+ continue; -+ if (p->prio >= filter_prio) -+ break; -+ if (cpumask_test_cpu(dest_cpu, &p->cpus_mask)) { -+ dequeue_task(p, rq, 0); -+ set_task_cpu(p, dest_cpu); -+ enqueue_task(p, dest_rq, 0); -+ nr_migrated++; -+ } -+ nr_tries--; -+ /* make a jump */ -+ if (node == &rq->sl_header) -+ break; -+ node = node->next[0]; -+ } -+ -+ return nr_migrated; -+} -+ -+static inline int -+take_queued_task_cpumask(struct rq *rq, cpumask_t *chk_mask, int filter_prio) -+{ -+ int src_cpu; -+ -+ for_each_cpu(src_cpu, chk_mask) { -+ int nr_migrated; -+ struct rq *src_rq = cpu_rq(src_cpu); -+ -+ if (!do_raw_spin_trylock(&src_rq->lock)) { -+ if (PRIO_LIMIT == filter_prio) -+ continue; -+ return 0; -+ } -+ spin_acquire(&src_rq->lock.dep_map, SINGLE_DEPTH_NESTING, 1, _RET_IP_); -+ -+ update_rq_clock(src_rq); -+ if ((nr_migrated = migrate_pending_tasks(src_rq, rq, filter_prio))) -+ cpufreq_update_this_cpu(rq, 0); -+ -+ spin_release(&src_rq->lock.dep_map, _RET_IP_); -+ do_raw_spin_unlock(&src_rq->lock); -+ -+ if (nr_migrated || PRIO_LIMIT != filter_prio) -+ return nr_migrated; -+ } -+ return 0; -+} -+ -+static inline int take_other_rq_task(struct rq *rq, int cpu, int filter_prio) -+{ -+ struct cpumask *affinity_mask, *end; -+ struct cpumask chk; -+ -+ if (PRIO_LIMIT == filter_prio) { -+ cpumask_complement(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY]); -+#ifdef CONFIG_SMT_NICE -+ { -+ /* also try to take IDLE priority tasks from smt supressed cpu */ -+ struct cpumask t; -+ if (cpumask_and(&t, &sched_smt_supressed_mask, -+ &sched_rq_queued_masks[SCHED_RQ_IDLE])) -+ cpumask_or(&chk, &chk, &t); -+ } -+#endif -+ } else if (NORMAL_PRIO == filter_prio) { -+ cpumask_or(&chk, &sched_rq_pending_masks[SCHED_RQ_RT], -+ &sched_rq_pending_masks[SCHED_RQ_ISO]); -+ } else if (IDLE_PRIO == filter_prio) { -+ cpumask_complement(&chk, &sched_rq_pending_masks[SCHED_RQ_EMPTY]); -+ cpumask_andnot(&chk, &chk, &sched_rq_pending_masks[SCHED_RQ_IDLE]); -+ } else -+ cpumask_copy(&chk, &sched_rq_pending_masks[SCHED_RQ_RT]); -+ -+ if (cpumask_empty(&chk)) -+ return 0; -+ -+ affinity_mask = per_cpu(sched_cpu_llc_start_mask, cpu); -+ end = per_cpu(sched_cpu_affinity_chk_end_masks, cpu); -+ do { -+ struct cpumask tmp; -+ -+ if (cpumask_and(&tmp, &chk, affinity_mask) && -+ take_queued_task_cpumask(rq, &tmp, filter_prio)) -+ return 1; -+ } while (++affinity_mask < end); -+ -+ return 0; -+} -+#endif -+ -+static inline struct task_struct * -+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) -+{ -+ struct task_struct *next = rq_first_queued_task(rq); -+ -+#ifdef CONFIG_SMT_NICE -+ if (cpumask_test_cpu(cpu, &sched_smt_supressed_mask)) { -+ if (next->prio >= IDLE_PRIO) { -+ if (rq->online && -+ take_other_rq_task(rq, cpu, IDLE_PRIO)) -+ return rq_first_queued_task(rq); -+ return rq->idle; -+ } -+ } -+#endif -+ -+#ifdef CONFIG_SMP -+ if (likely(rq->online)) -+ if (take_other_rq_task(rq, cpu, next->prio)) { -+ resched_curr(rq); -+ return rq_first_queued_task(rq); -+ } -+#endif -+ return next; -+} -+ +static inline unsigned long get_preempt_disable_ip(struct task_struct *p) +{ +#ifdef CONFIG_DEBUG_PREEMPT @@ -4429,6 +4268,9 @@ index 000000000000..02d7d5a67c77 +#ifdef CONFIG_SCHED_STACK_END_CHECK + if (task_stack_end_corrupted(prev)) + panic("corrupted stack end detected inside scheduler\n"); ++ ++ if (task_scs_end_corrupted(prev)) ++ panic("corrupted shadow stack detected inside scheduler\n"); +#endif + +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP @@ -4451,16 +4293,166 @@ index 000000000000..02d7d5a67c77 + schedstat_inc(this_rq()->sched_count); +} + -+static inline void set_rq_task(struct rq *rq, struct task_struct *p) -+{ -+ p->last_ran = rq->clock_task; ++/* ++ * Compile time debug macro ++ * #define ALT_SCHED_DEBUG ++ */ + -+#ifdef CONFIG_HIGH_RES_TIMERS -+ if (p != rq->idle) -+ hrtick_start(rq, US_TO_NS(p->time_slice)); ++#ifdef ALT_SCHED_DEBUG ++void alt_sched_debug(void) ++{ ++ printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", ++ sched_rq_pending_mask.bits[0], ++ sched_rq_watermark[IDLE_WM].bits[0], ++ sched_sg_idle_mask.bits[0]); ++} ++#else ++inline void alt_sched_debug(void) {} +#endif -+ /* update rq->dither */ -+ rq->dither = rq_dither(rq); ++ ++#ifdef CONFIG_SMP ++ ++#define SCHED_RQ_NR_MIGRATION (32UL) ++/* ++ * Migrate pending tasks in @rq to @dest_cpu ++ * Will try to migrate mininal of half of @rq nr_running tasks and ++ * SCHED_RQ_NR_MIGRATION to @dest_cpu ++ */ ++static inline int ++migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu) ++{ ++ struct task_struct *p, *skip = rq->curr; ++ int nr_migrated = 0; ++ int nr_tries = min(rq->nr_running / 2, SCHED_RQ_NR_MIGRATION); ++ ++ while (skip != rq->idle && nr_tries && ++ (p = sched_rq_next_task(skip, rq)) != rq->idle) { ++ skip = sched_rq_next_task(p, rq); ++ if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) { ++ __SCHED_DEQUEUE_TASK(p, rq, 0, ); ++ set_task_cpu(p, dest_cpu); ++ __SCHED_ENQUEUE_TASK(p, dest_rq, 0); ++ nr_migrated++; ++ } ++ nr_tries--; ++ } ++ ++ return nr_migrated; ++} ++ ++static inline int take_other_rq_tasks(struct rq *rq, int cpu) ++{ ++ struct cpumask *affinity_mask, *end_mask; ++ ++ if (unlikely(!rq->online)) ++ return 0; ++ ++ if (cpumask_empty(&sched_rq_pending_mask)) ++ return 0; ++ ++ affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); ++ do { ++ int i; ++ for_each_cpu_and(i, &sched_rq_pending_mask, affinity_mask) { ++ int nr_migrated; ++ struct rq *src_rq; ++ ++ src_rq = cpu_rq(i); ++ if (!do_raw_spin_trylock(&src_rq->lock)) ++ continue; ++ spin_acquire(&src_rq->lock.dep_map, ++ SINGLE_DEPTH_NESTING, 1, _RET_IP_); ++ ++ if ((nr_migrated = migrate_pending_tasks(src_rq, rq, cpu))) { ++ src_rq->nr_running -= nr_migrated; ++#ifdef CONFIG_SMP ++ if (src_rq->nr_running < 2) ++ cpumask_clear_cpu(i, &sched_rq_pending_mask); ++#endif ++ rq->nr_running += nr_migrated; ++#ifdef CONFIG_SMP ++ if (rq->nr_running > 1) ++ cpumask_set_cpu(cpu, &sched_rq_pending_mask); ++#endif ++ update_sched_rq_watermark(rq); ++ cpufreq_update_util(rq, 0); ++ ++ spin_release(&src_rq->lock.dep_map, _RET_IP_); ++ do_raw_spin_unlock(&src_rq->lock); ++ ++ return 1; ++ } ++ ++ spin_release(&src_rq->lock.dep_map, _RET_IP_); ++ do_raw_spin_unlock(&src_rq->lock); ++ } ++ } while (++affinity_mask < end_mask); ++ ++ return 0; ++} ++#endif ++ ++/* ++ * Timeslices below RESCHED_NS are considered as good as expired as there's no ++ * point rescheduling when there's so little time left. ++ */ ++static inline void check_curr(struct task_struct *p, struct rq *rq) ++{ ++ if (unlikely(rq->idle == p)) ++ return; ++ ++ update_curr(rq, p); ++ ++ if (p->time_slice < RESCHED_NS) ++ time_slice_expired(p, rq); ++} ++ ++static inline struct task_struct * ++choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) ++{ ++ struct task_struct *next; ++ ++ if (unlikely(rq->skip)) { ++ next = rq_runnable_task(rq); ++ if (next == rq->idle) { ++#ifdef CONFIG_SMP ++ if (!take_other_rq_tasks(rq, cpu)) { ++#endif ++ rq->skip = NULL; ++ schedstat_inc(rq->sched_goidle); ++ return next; ++#ifdef CONFIG_SMP ++ } ++ next = rq_runnable_task(rq); ++#endif ++ } ++ rq->skip = NULL; ++#ifdef CONFIG_HIGH_RES_TIMERS ++ hrtick_start(rq, next->time_slice); ++#endif ++ return next; ++ } ++ ++ next = sched_rq_first_task(rq); ++ if (next == rq->idle) { ++#ifdef CONFIG_SMP ++ if (!take_other_rq_tasks(rq, cpu)) { ++#endif ++ schedstat_inc(rq->sched_goidle); ++ /*printk(KERN_INFO "sched: choose_next_task(%d) idle %px\n", cpu, next);*/ ++ return next; ++#ifdef CONFIG_SMP ++ } ++ next = sched_rq_first_task(rq); ++#endif ++ } ++#ifdef CONFIG_HIGH_RES_TIMERS ++ hrtick_start(rq, next->time_slice); ++#endif ++ /*printk(KERN_INFO "sched: choose_next_task(%d) next %px\n", cpu, ++ * next);*/ ++ return next; +} + +/* @@ -4492,7 +4484,7 @@ index 000000000000..02d7d5a67c77 + * - in IRQ context, return from interrupt-handler to + * preemptible context + * -+ * - If the kernel is not preemptible (CONFIG_PREEMPT is not set) ++ * - If the kernel is not preemptible (CONFIG_PREEMPTION is not set) + * then at the next: + * + * - cond_resched() call @@ -4506,6 +4498,7 @@ index 000000000000..02d7d5a67c77 +{ + struct task_struct *prev, *next; + unsigned long *switch_count; ++ unsigned long prev_state; + struct rq *rq; + int cpu; + @@ -4515,7 +4508,7 @@ index 000000000000..02d7d5a67c77 + + schedule_debug(prev, preempt); + -+ /* by passing sched_feat(HRTICK) checking which PDS doesn't support */ ++ /* by passing sched_feat(HRTICK) checking which Alt schedule FW doesn't support */ + hrtick_clear(rq); + + local_irq_disable(); @@ -4524,9 +4517,16 @@ index 000000000000..02d7d5a67c77 + /* + * Make sure that signal_pending_state()->signal_pending() below + * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) -+ * done by the caller to avoid the race with signal_wake_up(). ++ * done by the caller to avoid the race with signal_wake_up(): + * -+ * The membarrier system call requires a full memory barrier ++ * __set_current_state(@state) signal_wake_up() ++ * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING) ++ * wake_up_state(p, state) ++ * LOCK rq->lock LOCK p->pi_state ++ * smp_mb__after_spinlock() smp_mb__after_spinlock() ++ * if (signal_pending_state()) if (p->state & @state) ++ * ++ * Also, the membarrier system call requires a full memory barrier + * after coming from user-space, before storing to rq->curr. + */ + raw_spin_lock(&rq->lock); @@ -4535,10 +4535,38 @@ index 000000000000..02d7d5a67c77 + update_rq_clock(rq); + + switch_count = &prev->nivcsw; -+ if (!preempt && prev->state) { -+ if (signal_pending_state(prev->state, prev)) { ++ /* ++ * We must load prev->state once (task_struct::state is volatile), such ++ * that: ++ * ++ * - we form a control dependency vs deactivate_task() below. ++ * - ptrace_{,un}freeze_traced() can change ->state underneath us. ++ */ ++ prev_state = prev->state; ++ if (!preempt && prev_state && prev_state == prev->state) { ++ if (signal_pending_state(prev_state, prev)) { + prev->state = TASK_RUNNING; + } else { ++ prev->sched_contributes_to_load = ++ (prev_state & TASK_UNINTERRUPTIBLE) && ++ !(prev_state & TASK_NOLOAD) && ++ !(prev->flags & PF_FROZEN); ++ ++ if (prev->sched_contributes_to_load) ++ rq->nr_uninterruptible++; ++ ++ /* ++ * __schedule() ttwu() ++ * prev_state = prev->state; if (p->on_rq && ...) ++ * if (prev_state) goto out; ++ * p->on_rq = 0; smp_acquire__after_ctrl_dep(); ++ * p->state = TASK_WAKING ++ * ++ * Where __schedule() and ttwu() have matching control dependencies. ++ * ++ * After this, schedule() must not care about p->state any more. ++ */ ++ sched_task_deactivate(prev, rq); + deactivate_task(prev, rq); + + if (prev->in_iowait) { @@ -4549,19 +4577,18 @@ index 000000000000..02d7d5a67c77 + switch_count = &prev->nvcsw; + } + ++ check_curr(prev, rq); ++ ++ next = choose_next_task(rq, cpu, prev); + clear_tsk_need_resched(prev); + clear_preempt_need_resched(); + -+ check_deadline(prev, rq); + -+ next = choose_next_task(rq, cpu, prev); -+ -+ set_rq_task(rq, next); -+ -+ if (prev != next) { -+ if (next->prio == PRIO_LIMIT) -+ schedstat_inc(rq->sched_goidle); ++ if (likely(prev != next)) { ++ next->last_ran = rq->clock_task; ++ rq->last_ts_switch = rq->clock; + ++ rq->nr_switches++; + /* + * RCU users of rcu_dereference(rq->curr) may not see + * changes to task_struct made by pick_next_task(). @@ -4582,7 +4609,6 @@ index 000000000000..02d7d5a67c77 + * is a RELEASE barrier), + */ + ++*switch_count; -+ rq->nr_switches++; + + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + @@ -4590,11 +4616,12 @@ index 000000000000..02d7d5a67c77 + + /* Also unlocks the rq: */ + rq = context_switch(rq, prev, next); -+#ifdef CONFIG_SCHED_SMT -+ pds_sg_balance_check(rq); -+#endif + } else + raw_spin_unlock_irq(&rq->lock); ++ ++#ifdef CONFIG_SCHED_SMT ++ sg_balance_check(rq); ++#endif +} + +void __noreturn do_task_dead(void) @@ -4604,8 +4631,8 @@ index 000000000000..02d7d5a67c77 + + /* Tell freezer to ignore us: */ + current->flags |= PF_NOFREEZE; -+ __schedule(false); + ++ __schedule(false); + BUG(); + + /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ @@ -4615,8 +4642,7 @@ index 000000000000..02d7d5a67c77 + +static inline void sched_submit_work(struct task_struct *tsk) +{ -+ if (!tsk->state || tsk_is_pi_blocked(tsk) || -+ signal_pending_state(tsk->state, tsk)) ++ if (!tsk->state) + return; + + /* @@ -4624,7 +4650,8 @@ index 000000000000..02d7d5a67c77 + * it wants to wake up a task to maintain concurrency. + * As this function is called inside the schedule() context, + * we disable preemption to avoid it calling schedule() again -+ * in the possible wakeup of a kworker. ++ * in the possible wakeup of a kworker and because wq_worker_sleeping() ++ * requires it. + */ + if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { + preempt_disable(); @@ -4635,6 +4662,9 @@ index 000000000000..02d7d5a67c77 + preempt_enable_no_resched(); + } + ++ if (tsk_is_pi_blocked(tsk)) ++ return; ++ + /* + * If we are going to sleep and we have plugged IO queued, + * make sure to submit it to avoid deadlocks. @@ -4663,7 +4693,7 @@ index 000000000000..02d7d5a67c77 + __schedule(false); + sched_preempt_enable_no_resched(); + } while (need_resched()); -+ sched_update_worker(tsk); ++ sched_update_worker(tsk); +} +EXPORT_SYMBOL(schedule); + @@ -4854,25 +4884,17 @@ index 000000000000..02d7d5a67c77 +int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, + void *key) +{ ++ WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC); + return try_to_wake_up(curr->private, mode, wake_flags); +} +EXPORT_SYMBOL(default_wake_function); + -+static inline void -+check_task_changed(struct rq *rq, struct task_struct *p) ++static inline void check_task_changed(struct rq *rq, struct task_struct *p) +{ -+ /* -+ * Trigger changes when task priority/deadline modified. -+ */ -+ if (task_on_rq_queued(p)) { -+ struct task_struct *first; -+ ++ /* Trigger resched if task sched_prio has been modified. */ ++ if (task_on_rq_queued(p) && sched_task_need_requeue(p, rq)) { + requeue_task(p, rq); -+ -+ /* Resched if first queued task not running and not IDLE */ -+ if ((first = rq_first_queued_task(rq)) != rq->curr && -+ !task_running_idle(first)) -+ resched_curr(rq); ++ check_preempt_curr(rq); + } +} + @@ -4961,7 +4983,6 @@ index 000000000000..02d7d5a67c77 + update_task_priodl(p); + + check_task_changed(rq, p); -+ +out_unlock: + __task_access_unlock(p, lock); +} @@ -4974,14 +4995,12 @@ index 000000000000..02d7d5a67c77 + +void set_user_nice(struct task_struct *p, long nice) +{ -+ int new_static; + unsigned long flags; + struct rq *rq; + raw_spinlock_t *lock; + + if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) + return; -+ new_static = NICE_TO_PRIO(nice); + /* + * We have to be careful, if called from sys_setpriority(), + * the task might be in the middle of scheduling on another CPU. @@ -4989,10 +5008,7 @@ index 000000000000..02d7d5a67c77 + raw_spin_lock_irqsave(&p->pi_lock, flags); + rq = __task_access_lock(p, &lock); + -+ /* rq lock may not held!! */ -+ update_rq_clock(rq); -+ -+ p->static_prio = new_static; ++ p->static_prio = NICE_TO_PRIO(nice); + /* + * The RT priorities are set via sched_setscheduler(), but we still + * allow the 'normal' nice value to be set - but as expected @@ -5002,8 +5018,6 @@ index 000000000000..02d7d5a67c77 + if (task_has_rt_policy(p)) + goto out_unlock; + -+ p->deadline -= task_deadline_diff(p); -+ p->deadline += static_deadline_diff(new_static); + p->prio = effective_prio(p); + update_task_priodl(p); + @@ -5065,33 +5079,6 @@ index 000000000000..02d7d5a67c77 +#endif + +/** -+ * task_prio - return the priority value of a given task. -+ * @p: the task in question. -+ * -+ * Return: The priority value as seen by users in /proc. -+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes -+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). -+ */ -+int task_prio(const struct task_struct *p) -+{ -+ int level, prio = p->prio - MAX_RT_PRIO; -+ static const int level_to_nice_prio[] = {39, 33, 26, 20, 14, 7, 0, 0}; -+ -+ /* rt tasks */ -+ if (prio <= 0) -+ goto out; -+ -+ preempt_disable(); -+ level = task_deadline_level(p, this_rq()); -+ preempt_enable(); -+ prio += level_to_nice_prio[level]; -+ if (idleprio_task(p)) -+ prio += NICE_WIDTH; -+out: -+ return prio; -+} -+ -+/** + * idle_cpu - is a given CPU idle currently? + * @cpu: the processor in question. + * @@ -5099,7 +5086,20 @@ index 000000000000..02d7d5a67c77 + */ +int idle_cpu(int cpu) +{ -+ return cpu_curr(cpu) == cpu_rq(cpu)->idle; ++ struct rq *rq = cpu_rq(cpu); ++ ++ if (rq->curr != rq->idle) ++ return 0; ++ ++ if (rq->nr_running) ++ return 0; ++ ++#ifdef CONFIG_SMP ++ if (rq->ttwu_pending) ++ return 0; ++#endif ++ ++ return 1; +} + +/** @@ -5124,154 +5124,6 @@ index 000000000000..02d7d5a67c77 + return pid ? find_task_by_vpid(pid) : current; +} + -+#ifdef CONFIG_SMP -+void sched_set_stop_task(int cpu, struct task_struct *stop) -+{ -+ struct sched_param stop_param = { .sched_priority = STOP_PRIO }; -+ struct sched_param start_param = { .sched_priority = 0 }; -+ struct task_struct *old_stop = cpu_rq(cpu)->stop; -+ -+ if (stop) { -+ /* -+ * Make it appear like a SCHED_FIFO task, its something -+ * userspace knows about and won't get confused about. -+ * -+ * Also, it will make PI more or less work without too -+ * much confusion -- but then, stop work should not -+ * rely on PI working anyway. -+ */ -+ sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param); -+ } -+ -+ cpu_rq(cpu)->stop = stop; -+ -+ if (old_stop) { -+ /* -+ * Reset it back to a normal scheduling policy so that -+ * it can die in pieces. -+ */ -+ sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param); -+ } -+} -+ -+/* -+ * Change a given task's CPU affinity. Migrate the thread to a -+ * proper CPU and schedule it away if the CPU it's executing on -+ * is removed from the allowed bitmask. -+ * -+ * NOTE: the caller must have a valid reference to the task, the -+ * task must not exit() & deallocate itself prematurely. The -+ * call is not atomic; no spinlocks may be held. -+ */ -+static int __set_cpus_allowed_ptr(struct task_struct *p, -+ const struct cpumask *new_mask, bool check) -+{ -+ const struct cpumask *cpu_valid_mask = cpu_active_mask; -+ int dest_cpu; -+ unsigned long flags; -+ struct rq *rq; -+ raw_spinlock_t *lock; -+ int ret = 0; -+ -+ raw_spin_lock_irqsave(&p->pi_lock, flags); -+ rq = __task_access_lock(p, &lock); -+ -+ if (p->flags & PF_KTHREAD) { -+ /* -+ * Kernel threads are allowed on online && !active CPUs -+ */ -+ cpu_valid_mask = cpu_online_mask; -+ } -+ -+ /* -+ * Must re-check here, to close a race against __kthread_bind(), -+ * sched_setaffinity() is not guaranteed to observe the flag. -+ */ -+ if (check && (p->flags & PF_NO_SETAFFINITY)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ if (cpumask_equal(&p->cpus_mask, new_mask)) -+ goto out; -+ -+ dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); -+ if (dest_cpu >= nr_cpu_ids) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ do_set_cpus_allowed(p, new_mask); -+ -+ if (p->flags & PF_KTHREAD) { -+ /* -+ * For kernel threads that do indeed end up on online && -+ * !active we want to ensure they are strict per-CPU threads. -+ */ -+ WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && -+ !cpumask_intersects(new_mask, cpu_active_mask) && -+ p->nr_cpus_allowed != 1); -+ } -+ -+ /* Can the task run on the task's current CPU? If so, we're done */ -+ if (cpumask_test_cpu(task_cpu(p), new_mask)) -+ goto out; -+ -+ if (task_running(p) || p->state == TASK_WAKING) { -+ struct migration_arg arg = { p, dest_cpu }; -+ -+ /* Need help from migration thread: drop lock and wait. */ -+ __task_access_unlock(p, lock); -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); -+ return 0; -+ } -+ if (task_on_rq_queued(p)) { -+ /* -+ * OK, since we're going to drop the lock immediately -+ * afterwards anyway. -+ */ -+ update_rq_clock(rq); -+ rq = move_queued_task(rq, p, dest_cpu); -+ lock = &rq->lock; -+ } -+ -+out: -+ __task_access_unlock(p, lock); -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ -+ return ret; -+} -+ -+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) -+{ -+ return __set_cpus_allowed_ptr(p, new_mask, false); -+} -+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); -+ -+#else -+static inline int -+__set_cpus_allowed_ptr(struct task_struct *p, -+ const struct cpumask *new_mask, bool check) -+{ -+ return set_cpus_allowed_ptr(p, new_mask); -+} -+#endif -+ -+static u64 task_init_deadline(const struct task_struct *p) -+{ -+ return task_rq(p)->clock + task_deadline_diff(p); -+} -+ -+u64 (* task_init_deadline_func_tbl[])(const struct task_struct *p) = { -+ task_init_deadline, /* SCHED_NORMAL */ -+ NULL, /* SCHED_FIFO */ -+ NULL, /* SCHED_RR */ -+ task_init_deadline, /* SCHED_BATCH */ -+ NULL, /* SCHED_ISO */ -+ task_init_deadline /* SCHED_IDLE */ -+}; -+ +/* + * sched_setparam() passes in -1 for its policy, to let the functions + * it calls know not to change it. @@ -5281,7 +5133,6 @@ index 000000000000..02d7d5a67c77 +static void __setscheduler_params(struct task_struct *p, + const struct sched_attr *attr) +{ -+ int old_policy = p->policy; + int policy = attr->sched_policy; + + if (policy == SETPARAM_POLICY) @@ -5303,10 +5154,6 @@ index 000000000000..02d7d5a67c77 + */ + p->rt_priority = attr->sched_priority; + p->normal_prio = normal_prio(p); -+ -+ if (old_policy != policy) -+ p->deadline = (task_init_deadline_func_tbl[p->policy])? -+ task_init_deadline_func_tbl[p->policy](p):0ULL; +} + +/* Actually do priority change: must hold rq lock. */ @@ -5341,9 +5188,9 @@ index 000000000000..02d7d5a67c77 + return match; +} + -+static int -+__sched_setscheduler(struct task_struct *p, -+ const struct sched_attr *attr, bool user, bool pi) ++static int __sched_setscheduler(struct task_struct *p, ++ const struct sched_attr *attr, ++ bool user, bool pi) +{ + const struct sched_attr dl_squash_attr = { + .size = sizeof(struct sched_attr), @@ -5363,7 +5210,7 @@ index 000000000000..02d7d5a67c77 + BUG_ON(pi && in_interrupt()); + + /* -+ * PDS supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO ++ * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO + */ + if (unlikely(SCHED_DEADLINE == policy)) { + attr = &dl_squash_attr; @@ -5552,6 +5399,8 @@ index 000000000000..02d7d5a67c77 + * @policy: new policy. + * @param: structure containing the new RT priority. + * ++ * Use sched_set_fifo(), read its comment. ++ * + * Return: 0 on success. An error code otherwise. + * + * NOTE that the task may be already dead. @@ -5562,13 +5411,10 @@ index 000000000000..02d7d5a67c77 + return _sched_setscheduler(p, policy, param, true); +} + -+EXPORT_SYMBOL_GPL(sched_setscheduler); -+ +int sched_setattr(struct task_struct *p, const struct sched_attr *attr) +{ + return __sched_setscheduler(p, attr, true, true); +} -+EXPORT_SYMBOL_GPL(sched_setattr); + +int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) +{ @@ -5593,7 +5439,51 @@ index 000000000000..02d7d5a67c77 +{ + return _sched_setscheduler(p, policy, param, false); +} -+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); ++ ++/* ++ * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally ++ * incapable of resource management, which is the one thing an OS really should ++ * be doing. ++ * ++ * This is of course the reason it is limited to privileged users only. ++ * ++ * Worse still; it is fundamentally impossible to compose static priority ++ * workloads. You cannot take two correctly working static prio workloads ++ * and smash them together and still expect them to work. ++ * ++ * For this reason 'all' FIFO tasks the kernel creates are basically at: ++ * ++ * MAX_RT_PRIO / 2 ++ * ++ * The administrator _MUST_ configure the system, the kernel simply doesn't ++ * know enough information to make a sensible choice. ++ */ ++void sched_set_fifo(struct task_struct *p) ++{ ++ struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 }; ++ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); ++} ++EXPORT_SYMBOL_GPL(sched_set_fifo); ++ ++/* ++ * For when you don't much care about FIFO, but want to be above SCHED_NORMAL. ++ */ ++void sched_set_fifo_low(struct task_struct *p) ++{ ++ struct sched_param sp = { .sched_priority = 1 }; ++ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); ++} ++EXPORT_SYMBOL_GPL(sched_set_fifo_low); ++ ++void sched_set_normal(struct task_struct *p, int nice) ++{ ++ struct sched_attr attr = { ++ .sched_policy = SCHED_NORMAL, ++ .sched_nice = nice, ++ }; ++ WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0); ++} ++EXPORT_SYMBOL_GPL(sched_set_normal); + +static int +do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) @@ -5866,7 +5756,9 @@ index 000000000000..02d7d5a67c77 + goto out_unlock; + + kattr.sched_policy = p->policy; -+ if (rt_task(p)) ++ if (p->sched_reset_on_fork) ++ kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; ++ if (task_has_rt_policy(p)) + kattr.sched_priority = p->rt_priority; + else + kattr.sched_nice = task_nice(p); @@ -5887,7 +5779,7 @@ index 000000000000..02d7d5a67c77 + +long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) +{ -+ cpumask_var_t cpus_mask, new_mask; ++ cpumask_var_t cpus_allowed, new_mask; + struct task_struct *p; + int retval; + @@ -5909,7 +5801,7 @@ index 000000000000..02d7d5a67c77 + retval = -EINVAL; + goto out_put_task; + } -+ if (!alloc_cpumask_var(&cpus_mask, GFP_KERNEL)) { ++ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { + retval = -ENOMEM; + goto out_put_task; + } @@ -5931,27 +5823,27 @@ index 000000000000..02d7d5a67c77 + if (retval) + goto out_unlock; + -+ cpuset_cpus_allowed(p, cpus_mask); -+ cpumask_and(new_mask, in_mask, cpus_mask); ++ cpuset_cpus_allowed(p, cpus_allowed); ++ cpumask_and(new_mask, in_mask, cpus_allowed); +again: + retval = __set_cpus_allowed_ptr(p, new_mask, true); + + if (!retval) { -+ cpuset_cpus_allowed(p, cpus_mask); -+ if (!cpumask_subset(new_mask, cpus_mask)) { ++ cpuset_cpus_allowed(p, cpus_allowed); ++ if (!cpumask_subset(new_mask, cpus_allowed)) { + /* + * We must have raced with a concurrent cpuset -+ * update. Just reset the cpus_mask to the -+ * cpuset's cpus_mask ++ * update. Just reset the cpus_allowed to the ++ * cpuset's cpus_allowed + */ -+ cpumask_copy(new_mask, cpus_mask); ++ cpumask_copy(new_mask, cpus_allowed); + goto again; + } + } +out_unlock: + free_cpumask_var(new_mask); +out_free_cpus_allowed: -+ free_cpumask_var(cpus_mask); ++ free_cpumask_var(cpus_allowed); +out_put_task: + put_task_struct(p); + put_online_cpus(); @@ -6077,12 +5969,16 @@ index 000000000000..02d7d5a67c77 + + rq = this_rq_lock_irq(&rf); + -+ if (sched_yield_type > 1) { -+ time_slice_expired(current, rq); -+ requeue_task(current, rq); -+ } + schedstat_inc(rq->yld_count); + ++ if (1 == sched_yield_type) { ++ if (!rt_task(current)) ++ do_sched_yield_type_1(current, rq); ++ } else if (2 == sched_yield_type) { ++ if (rq->nr_running > 1) ++ rq->skip = current; ++ } ++ + /* + * Since we are going to call schedule() anyway, there's + * no need to preempt or enable interrupts: @@ -6180,7 +6076,7 @@ index 000000000000..02d7d5a67c77 + * It's the caller's job to ensure that the target task struct + * can't go away on us before we can do any checks. + * -+ * In PDS, yield_to is not supported. ++ * In Alt schedule FW, yield_to is not supported. + * + * Return: + * true (>0) if we indeed boosted the target task. @@ -6229,7 +6125,7 @@ index 000000000000..02d7d5a67c77 +} +EXPORT_SYMBOL(io_schedule_timeout); + -+void io_schedule(void) ++void __sched io_schedule(void) +{ + int token; + @@ -6258,7 +6154,6 @@ index 000000000000..02d7d5a67c77 + break; + case SCHED_NORMAL: + case SCHED_BATCH: -+ case SCHED_ISO: + case SCHED_IDLE: + ret = 0; + break; @@ -6285,7 +6180,6 @@ index 000000000000..02d7d5a67c77 + break; + case SCHED_NORMAL: + case SCHED_BATCH: -+ case SCHED_ISO: + case SCHED_IDLE: + ret = 0; + break; @@ -6298,6 +6192,8 @@ index 000000000000..02d7d5a67c77 + struct task_struct *p; + int retval; + ++ alt_sched_debug(); ++ + if (pid < 0) + return -EINVAL; + @@ -6312,7 +6208,7 @@ index 000000000000..02d7d5a67c77 + goto out_unlock; + rcu_read_unlock(); + -+ *t = ns_to_timespec64(MS_TO_NS(rr_interval)); ++ *t = ns_to_timespec64(sched_timeslice_ns); + return 0; + +out_unlock: @@ -6362,10 +6258,10 @@ index 000000000000..02d7d5a67c77 + if (!try_get_task_stack(p)) + return; + -+ printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); ++ pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p)); + + if (p->state == TASK_RUNNING) -+ printk(KERN_CONT " running task "); ++ pr_cont(" running task "); +#ifdef CONFIG_DEBUG_STACK_USAGE + free = stack_not_used(p); +#endif @@ -6374,8 +6270,8 @@ index 000000000000..02d7d5a67c77 + if (pid_alive(p)) + ppid = task_pid_nr(rcu_dereference(p->real_parent)); + rcu_read_unlock(); -+ printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, -+ task_pid_nr(p), ppid, ++ pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n", ++ free, task_pid_nr(p), ppid, + (unsigned long)task_thread_info(p)->flags); + + print_worker_info(KERN_INFO, p); @@ -6410,13 +6306,6 @@ index 000000000000..02d7d5a67c77 +{ + struct task_struct *g, *p; + -+#if BITS_PER_LONG == 32 -+ printk(KERN_INFO -+ " task PC stack pid father\n"); -+#else -+ printk(KERN_INFO -+ " task PC stack pid father\n"); -+#endif + rcu_read_lock(); + for_each_process_thread(g, p) { + /* @@ -6433,7 +6322,7 @@ index 000000000000..02d7d5a67c77 + } + +#ifdef CONFIG_SCHED_DEBUG -+ /* PDS TODO: should support this ++ /* TODO: Alt schedule FW should support this + if (!state_filter) + sysrq_sched_debug_show(); + */ @@ -6455,7 +6344,7 @@ index 000000000000..02d7d5a67c77 +/** + * init_idle - set up an idle thread for a given CPU + * @idle: task in question -+ * @cpu: cpu the idle task belongs to ++ * @cpu: CPU the idle task belongs to + * + * NOTE: this function does not set the idle thread's NEED_RESCHED + * flag, to make booting more robust. @@ -6465,6 +6354,8 @@ index 000000000000..02d7d5a67c77 + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + ++ __sched_fork(0, idle); ++ + raw_spin_lock_irqsave(&idle->pi_lock, flags); + raw_spin_lock(&rq->lock); + update_rq_clock(rq); @@ -6472,11 +6363,9 @@ index 000000000000..02d7d5a67c77 + idle->last_ran = rq->clock_task; + idle->state = TASK_RUNNING; + idle->flags |= PF_IDLE; -+ /* Setting prio to illegal value shouldn't matter when never queued */ -+ idle->prio = PRIO_LIMIT; -+ idle->deadline = rq_clock(rq) + task_deadline_diff(idle); -+ update_task_priodl(idle); ++ sched_queue_init_idle(rq, idle); + ++ scs_task_reset(idle); + kasan_unpoison_task_stack(idle); + +#ifdef CONFIG_SMP @@ -6511,104 +6400,6 @@ index 000000000000..02d7d5a67c77 +#endif +} + -+void resched_cpu(int cpu) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&rq->lock, flags); -+ if (cpu_online(cpu) || cpu == smp_processor_id()) -+ resched_curr(cpu_rq(cpu)); -+ raw_spin_unlock_irqrestore(&rq->lock, flags); -+} -+ -+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) -+{ -+ struct wake_q_node *node = &task->wake_q; -+ -+ /* -+ * Atomically grab the task, if ->wake_q is !nil already it means -+ * its already queued (either by us or someone else) and will get the -+ * wakeup due to that. -+ * -+ * In order to ensure that a pending wakeup will observe our pending -+ * state, even in the failed case, an explicit smp_mb() must be used. -+ */ -+ smp_mb__before_atomic(); -+ if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) -+ return false; -+ -+ /* -+ * The head is context local, there can be no concurrency. -+ */ -+ *head->lastp = node; -+ head->lastp = &node->next; -+ return true; -+} -+ -+/** -+ * wake_q_add() - queue a wakeup for 'later' waking. -+ * @head: the wake_q_head to add @task to -+ * @task: the task to queue for 'later' wakeup -+ * -+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the -+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come -+ * instantly. -+ * -+ * This function must be used as-if it were wake_up_process(); IOW the task -+ * must be ready to be woken at this location. -+ */ -+void wake_q_add(struct wake_q_head *head, struct task_struct *task) -+{ -+ if (__wake_q_add(head, task)) -+ get_task_struct(task); -+} -+ -+/** -+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking. -+ * @head: the wake_q_head to add @task to -+ * @task: the task to queue for 'later' wakeup -+ * -+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the -+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come -+ * instantly. -+ * -+ * This function must be used as-if it were wake_up_process(); IOW the task -+ * must be ready to be woken at this location. -+ * -+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers -+ * that already hold reference to @task can call the 'safe' version and trust -+ * wake_q to do the right thing depending whether or not the @task is already -+ * queued for wakeup. -+ */ -+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) -+{ -+ if (!__wake_q_add(head, task)) -+ put_task_struct(task); -+} -+ -+void wake_up_q(struct wake_q_head *head) -+{ -+ struct wake_q_node *node = head->first; -+ -+ while (node != WAKE_Q_TAIL) { -+ struct task_struct *task; -+ -+ task = container_of(node, struct task_struct, wake_q); -+ BUG_ON(!task); -+ /* task can safely be re-inserted now: */ -+ node = node->next; -+ task->wake_q.next = NULL; -+ -+ /* -+ * wake_up_process() executes a full barrier, which pairs with -+ * the queueing in wake_q_add() so as not to miss wakeups. -+ */ -+ wake_up_process(task); -+ put_task_struct(task); -+ } -+} -+ +#ifdef CONFIG_SMP + +int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur, @@ -6637,75 +6428,7 @@ index 000000000000..02d7d5a67c77 + return ret; +} + -+static bool sched_smp_initialized __read_mostly; -+ -+#ifdef CONFIG_NO_HZ_COMMON -+void nohz_balance_enter_idle(int cpu) -+{ -+} -+ -+void select_nohz_load_balancer(int stop_tick) -+{ -+} -+ -+void set_cpu_sd_state_idle(void) {} -+ -+/* -+ * In the semi idle case, use the nearest busy CPU for migrating timers -+ * from an idle CPU. This is good for power-savings. -+ * -+ * We don't do similar optimization for completely idle system, as -+ * selecting an idle CPU will add more delays to the timers than intended -+ * (as that CPU's timer base may not be uptodate wrt jiffies etc). -+ */ -+int get_nohz_timer_target(void) -+{ -+ int i, cpu = smp_processor_id(), default_cpu = -1; -+ struct cpumask *mask; -+ -+ if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) { -+ if (!idle_cpu(cpu)) -+ return cpu; -+ default_cpu = cpu; -+ } -+ -+ for (mask = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]); -+ mask < per_cpu(sched_cpu_affinity_chk_end_masks, cpu); mask++) -+ for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) -+ if (!idle_cpu(i)) -+ return i; -+ -+ if (default_cpu == -1) -+ default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER); -+ cpu = default_cpu; -+ -+ return cpu; -+} -+ -+/* -+ * When add_timer_on() enqueues a timer into the timer wheel of an -+ * idle CPU then this timer might expire before the next timer event -+ * which is scheduled to wake up that CPU. In case of a completely -+ * idle system the next event might even be infinite time into the -+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and -+ * leaves the inner idle loop so the newly added timer is taken into -+ * account when the CPU goes back to idle and evaluates the timer -+ * wheel for the next timer event. -+ */ -+void wake_up_idle_cpu(int cpu) -+{ -+ if (cpu == smp_processor_id()) -+ return; -+ -+ set_tsk_need_resched(cpu_rq(cpu)->idle); -+ smp_send_reschedule(cpu); -+} -+ -+void wake_up_nohz_cpu(int cpu) -+{ -+ wake_up_idle_cpu(cpu); -+} -+#endif /* CONFIG_NO_HZ_COMMON */ ++bool sched_smp_initialized __read_mostly; + +#ifdef CONFIG_HOTPLUG_CPU +/* @@ -6738,7 +6461,6 @@ index 000000000000..02d7d5a67c77 +{ + struct rq *rq = dead_rq; + struct task_struct *p, *stop = rq->stop; -+ struct skiplist_node *node; + int count = 0; + + /* @@ -6752,18 +6474,18 @@ index 000000000000..02d7d5a67c77 + */ + rq->stop = NULL; + -+ node = &rq->sl_header; -+ while ((node = node->next[0]) != &rq->sl_header) { ++ p = sched_rq_first_task(rq); ++ while (p != rq->idle) { + int dest_cpu; + -+ p = skiplist_entry(node, struct task_struct, sl_node); -+ + /* skip the running task */ -+ if (task_running(p)) ++ if (task_running(p) || 1 == p->nr_cpus_allowed) { ++ p = sched_rq_next_task(p, rq); + continue; ++ } + + /* -+ * Rules for changing task_struct::cpus_mask are holding ++ * Rules for changing task_struct::cpus_allowed are holding + * both pi_lock and rq->lock, such that holding either + * stabilizes the mask. + * @@ -6782,13 +6504,13 @@ index 000000000000..02d7d5a67c77 + */ + if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) { + raw_spin_unlock(&p->pi_lock); ++ p = sched_rq_next_task(p, rq); + continue; + } + + count++; + /* Find suitable destination for @next, with force if needed. */ + dest_cpu = select_fallback_rq(dead_rq->cpu, p); -+ + rq = __migrate_task(rq, p, dest_cpu); + raw_spin_unlock(&rq->lock); + raw_spin_unlock(&p->pi_lock); @@ -6796,7 +6518,7 @@ index 000000000000..02d7d5a67c77 + rq = dead_rq; + raw_spin_lock(&rq->lock); + /* Check queued task all over from the header again */ -+ node = &rq->sl_header; ++ p = sched_rq_first_task(rq); + } + + rq->stop = stop; @@ -6815,187 +6537,11 @@ index 000000000000..02d7d5a67c77 + rq->online = true; +} + -+#ifdef CONFIG_SCHED_DEBUG -+ -+static __read_mostly int sched_debug_enabled; -+ -+static int __init sched_debug_setup(char *str) -+{ -+ sched_debug_enabled = 1; -+ -+ return 0; -+} -+early_param("sched_debug", sched_debug_setup); -+ -+static inline bool sched_debug(void) -+{ -+ return sched_debug_enabled; -+} -+#else /* !CONFIG_SCHED_DEBUG */ -+static inline bool sched_debug(void) -+{ -+ return false; -+} -+#endif /* CONFIG_SCHED_DEBUG */ -+ -+#ifdef CONFIG_SMP -+void send_call_function_single_ipi(int cpu) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ -+ if (!set_nr_if_polling(rq->idle)) -+ arch_send_call_function_single_ipi(cpu); -+ else -+ trace_sched_wake_idle_without_ipi(cpu); -+} -+ -+void sched_ttwu_pending(void *arg) -+{ -+ struct llist_node *llist = arg; -+ struct rq *rq = this_rq(); -+ struct task_struct *p, *t; -+ struct rq_flags rf; -+ -+ if (!llist) -+ return; -+ -+ /* -+ * rq::ttwu_pending racy indication of out-standing wakeups. -+ * Races such that false-negatives are possible, since they -+ * are shorter lived that false-positives would be. -+ */ -+ WRITE_ONCE(rq->ttwu_pending, 0); -+ -+ rq_lock_irqsave(rq, &rf); -+ update_rq_clock(rq); -+ -+ /*llist_for_each_entry_safe(p, t, llist, wake_entry) -+ ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);*/ -+ -+ rq_unlock_irqrestore(rq, &rf); -+} -+ -+void wake_up_if_idle(int cpu) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ unsigned long flags; -+ -+ rcu_read_lock(); -+ -+ if (!is_idle_task(rcu_dereference(rq->curr))) -+ goto out; -+ -+ if (set_nr_if_polling(rq->idle)) { -+ trace_sched_wake_idle_without_ipi(cpu); -+ } else { -+ raw_spin_lock_irqsave(&rq->lock, flags); -+ if (is_idle_task(rq->curr)) -+ smp_send_reschedule(cpu); -+ /* Else CPU is not idle, do nothing here */ -+ raw_spin_unlock_irqrestore(&rq->lock, flags); -+ } -+ -+out: -+ rcu_read_unlock(); -+} -+ -+bool cpus_share_cache(int this_cpu, int that_cpu) -+{ -+ return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); -+} -+#endif /* CONFIG_SMP */ -+ -+/* -+ * Topology list, bottom-up. -+ */ -+static struct sched_domain_topology_level default_topology[] = { -+#ifdef CONFIG_SCHED_SMT -+ { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, -+#endif -+#ifdef CONFIG_SCHED_MC -+ { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, -+#endif -+ { cpu_cpu_mask, SD_INIT_NAME(DIE) }, -+ { NULL, }, -+}; -+ -+static struct sched_domain_topology_level *sched_domain_topology = -+ default_topology; -+ -+#define for_each_sd_topology(tl) \ -+ for (tl = sched_domain_topology; tl->mask; tl++) -+ -+void set_sched_topology(struct sched_domain_topology_level *tl) -+{ -+ if (WARN_ON_ONCE(sched_smp_initialized)) -+ return; -+ -+ sched_domain_topology = tl; -+} -+ -+/* -+ * Initializers for schedule domains -+ * Non-inlined to reduce accumulated stack pressure in build_sched_domains() -+ */ -+ -+int sched_domain_level_max; -+ -+/* -+ * Partition sched domains as specified by the 'ndoms_new' -+ * cpumasks in the array doms_new[] of cpumasks. This compares -+ * doms_new[] to the current sched domain partitioning, doms_cur[]. -+ * It destroys each deleted domain and builds each new domain. -+ * -+ * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'. -+ * The masks don't intersect (don't overlap.) We should setup one -+ * sched domain for each mask. CPUs not in any of the cpumasks will -+ * not be load balanced. If the same cpumask appears both in the -+ * current 'doms_cur' domains and in the new 'doms_new', we can leave -+ * it as it is. -+ * -+ * The passed in 'doms_new' should be allocated using -+ * alloc_sched_domains. This routine takes ownership of it and will -+ * free_sched_domains it when done with it. If the caller failed the -+ * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1, -+ * and partition_sched_domains() will fallback to the single partition -+ * 'fallback_doms', it also forces the domains to be rebuilt. -+ * -+ * If doms_new == NULL it will be replaced with cpu_online_mask. -+ * ndoms_new == 0 is a special case for destroying existing domains, -+ * and it will not create the default domain. -+ * -+ * Call with hotplug lock held -+ */ -+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], -+ struct sched_domain_attr *dattr_new) -+{ -+ /** -+ * PDS doesn't depend on sched domains, but just keep this api -+ */ -+} -+ +/* + * used to mark begin/end of suspend/resume: + */ +static int num_cpus_frozen; + -+#ifdef CONFIG_NUMA -+int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; -+ -+/* -+ * sched_numa_find_closest() - given the NUMA topology, find the cpu -+ * closest to @cpu from @cpumask. -+ * cpumask: cpumask to find a cpu from -+ * cpu: cpu to be close to -+ * -+ * returns: cpu, or nr_cpu_ids when nothing found. -+ */ -+int sched_numa_find_closest(const struct cpumask *cpus, int cpu) -+{ -+ return best_mask_cpu(cpu, cpus); -+} -+#endif /* CONFIG_NUMA */ -+ +/* + * Update cpusets according to cpu_active mask. If cpusets are + * disabled, cpuset_update_active_cpus() becomes a simple wrapper @@ -7089,8 +6635,11 @@ index 000000000000..02d7d5a67c77 + /* + * When going down, decrement the number of cores with SMT present. + */ -+ if (cpumask_weight(cpu_smt_mask(cpu)) == 2) ++ if (cpumask_weight(cpu_smt_mask(cpu)) == 2) { + static_branch_dec_cpuslocked(&sched_smt_present); ++ if (!static_branch_likely(&sched_smt_present)) ++ cpumask_clear(&sched_sg_idle_mask); ++ } +#endif + + if (!sched_smp_initialized) @@ -7124,7 +6673,9 @@ index 000000000000..02d7d5a67c77 + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + ++ /* Handle pending wakeups and then migrate everything off */ + sched_tick_stop(cpu); ++ + raw_spin_lock_irqsave(&rq->lock, flags); + set_rq_offline(rq); + migrate_tasks(rq); @@ -7143,68 +6694,53 @@ index 000000000000..02d7d5a67c77 + + for_each_possible_cpu(cpu) { + for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) { -+ tmp = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[level]); ++ tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]); + cpumask_copy(tmp, cpu_possible_mask); + cpumask_clear_cpu(cpu, tmp); + } -+ per_cpu(sched_cpu_llc_start_mask, cpu) = -+ &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]); -+ per_cpu(sched_cpu_affinity_chk_end_masks, cpu) = -+ &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[1]); ++ per_cpu(sched_cpu_llc_mask, cpu) = ++ &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ per_cpu(sched_cpu_affinity_end_mask, cpu) = ++ &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); ++ /*per_cpu(sd_llc_id, cpu) = cpu;*/ + } +} + ++#define TOPOLOGY_CPUMASK(name, mask, last) \ ++ if (cpumask_and(chk, chk, mask)) \ ++ printk(KERN_INFO "sched: cpu#%02d affinity mask: 0x%08lx - "#name,\ ++ cpu, (chk++)->bits[0]); \ ++ if (!last) \ ++ cpumask_complement(chk, mask) ++ +static void sched_init_topology_cpumask(void) +{ + int cpu; + cpumask_t *chk; + + for_each_online_cpu(cpu) { -+ chk = &(per_cpu(sched_cpu_affinity_chk_masks, cpu)[0]); ++ /* take chance to reset time slice for idle tasks */ ++ cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns; + ++ chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ ++ cpumask_complement(chk, cpumask_of(cpu)); +#ifdef CONFIG_SCHED_SMT -+ cpumask_setall(chk); -+ cpumask_clear_cpu(cpu, chk); -+ if (cpumask_and(chk, chk, topology_sibling_cpumask(cpu))) { -+ per_cpu(sched_sibling_cpu, cpu) = cpumask_first(chk); -+ printk(KERN_INFO "pds: cpu #%d affinity check mask - smt 0x%08lx", -+ cpu, (chk++)->bits[0]); -+ } ++ TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); +#endif -+#ifdef CONFIG_SCHED_MC -+ cpumask_setall(chk); -+ cpumask_clear_cpu(cpu, chk); -+ if (cpumask_and(chk, chk, cpu_coregroup_mask(cpu))) { -+ per_cpu(sched_cpu_llc_start_mask, cpu) = chk; -+ printk(KERN_INFO "pds: cpu #%d affinity check mask - coregroup 0x%08lx", -+ cpu, (chk++)->bits[0]); -+ } -+ cpumask_complement(chk, cpu_coregroup_mask(cpu)); ++ per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu)); ++ per_cpu(sched_cpu_llc_mask, cpu) = chk; ++ TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false); + -+ /** -+ * Set up sd_llc_id per CPU -+ */ -+ per_cpu(sd_llc_id, cpu) = -+ cpumask_first(cpu_coregroup_mask(cpu)); -+#else -+ per_cpu(sd_llc_id, cpu) = -+ cpumask_first(topology_core_cpumask(cpu)); ++ TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false); + -+ per_cpu(sched_cpu_llc_start_mask, cpu) = chk; ++ TOPOLOGY_CPUMASK(others, cpu_online_mask, true); + -+ cpumask_setall(chk); -+ cpumask_clear_cpu(cpu, chk); -+#endif /* NOT CONFIG_SCHED_MC */ -+ if (cpumask_and(chk, chk, topology_core_cpumask(cpu))) -+ printk(KERN_INFO "pds: cpu #%d affinity check mask - core 0x%08lx", -+ cpu, (chk++)->bits[0]); -+ cpumask_complement(chk, topology_core_cpumask(cpu)); -+ -+ if (cpumask_and(chk, chk, cpu_online_mask)) -+ printk(KERN_INFO "pds: cpu #%d affinity check mask - others 0x%08lx", -+ cpu, (chk++)->bits[0]); -+ -+ per_cpu(sched_cpu_affinity_chk_end_masks, cpu) = chk; ++ per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; ++ printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", ++ cpu, per_cpu(sd_llc_id, cpu), ++ (int) (per_cpu(sched_cpu_llc_mask, cpu) - ++ &(per_cpu(sched_cpu_affinity_masks, cpu)[0]))); + } +} +#endif @@ -7215,8 +6751,6 @@ index 000000000000..02d7d5a67c77 + if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) + BUG(); + -+ cpumask_copy(&sched_rq_queued_masks[SCHED_RQ_EMPTY], cpu_online_mask); -+ + sched_init_topology_cpumask(); + + sched_smp_initialized = true; @@ -7224,6 +6758,7 @@ index 000000000000..02d7d5a67c77 +#else +void __init sched_init_smp(void) +{ ++ cpu_rq(0)->idle->time_slice = sched_timeslice_ns; +} +#endif /* CONFIG_SMP */ + @@ -7263,20 +6798,13 @@ index 000000000000..02d7d5a67c77 + int i; + struct rq *rq; + -+ print_scheduler_version(); ++ printk(KERN_INFO ALT_SCHED_VERSION_MSG); + + wait_bit_init(); + +#ifdef CONFIG_SMP -+ for (i = 0; i < NR_SCHED_RQ_QUEUED_LEVEL; i++) -+ cpumask_clear(&sched_rq_queued_masks[i]); -+ cpumask_setall(&sched_rq_queued_masks[SCHED_RQ_EMPTY]); -+ set_bit(SCHED_RQ_EMPTY, sched_rq_queued_masks_bitmap); -+ -+ cpumask_setall(&sched_rq_pending_masks[SCHED_RQ_EMPTY]); -+ set_bit(SCHED_RQ_EMPTY, sched_rq_pending_masks_bitmap); -+#else -+ uprq = &per_cpu(runqueues, 0); ++ for (i = 0; i < SCHED_BITS; i++) ++ cpumask_copy(&sched_rq_watermark[i], cpu_present_mask); +#endif + +#ifdef CONFIG_CGROUP_SCHED @@ -7288,9 +6816,12 @@ index 000000000000..02d7d5a67c77 +#endif /* CONFIG_CGROUP_SCHED */ + for_each_possible_cpu(i) { + rq = cpu_rq(i); -+ FULL_INIT_SKIPLIST_NODE(&rq->sl_header); ++ ++ sched_queue_init(rq); ++ rq->watermark = IDLE_WM; ++ rq->skip = NULL; ++ + raw_spin_lock_init(&rq->lock); -+ rq->dither = 0; + rq->nr_running = rq->nr_uninterruptible = 0; + rq->calc_load_active = 0; + rq->calc_load_update = jiffies + LOAD_FREQ; @@ -7298,22 +6829,23 @@ index 000000000000..02d7d5a67c77 + rq->online = false; + rq->cpu = i; + -+ rq->queued_level = SCHED_RQ_EMPTY; -+ rq->pending_level = SCHED_RQ_EMPTY; +#ifdef CONFIG_SCHED_SMT -+ per_cpu(sched_sibling_cpu, i) = i; + rq->active_balance = 0; +#endif ++ ++#ifdef CONFIG_NO_HZ_COMMON ++ rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); +#endif ++#endif /* CONFIG_SMP */ + rq->nr_switches = 0; -+ atomic_set(&rq->nr_iowait, 0); ++ + hrtick_rq_init(rq); ++ atomic_set(&rq->nr_iowait, 0); + } +#ifdef CONFIG_SMP + /* Set rq->online for cpu 0 */ + cpu_rq(0)->online = true; +#endif -+ + /* + * The boot idle thread does lazy MMU switching as well: + */ @@ -7524,15 +7056,6 @@ index 000000000000..02d7d5a67c77 + +#endif + -+#ifdef CONFIG_SCHED_DEBUG -+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, -+ struct seq_file *m) -+{} -+ -+void proc_sched_set_task(struct task_struct *p) -+{} -+#endif -+ +#ifdef CONFIG_CGROUP_SCHED +static void sched_free_group(struct task_group *tg) +{ @@ -7639,6 +7162,7 @@ index 000000000000..02d7d5a67c77 + { } /* Terminate */ +}; + ++ +static struct cftype cpu_files[] = { + { } /* terminate */ +}; @@ -7667,14 +7191,51 @@ index 000000000000..02d7d5a67c77 +#endif /* CONFIG_CGROUP_SCHED */ + +#undef CREATE_TRACE_POINTS -diff --git a/kernel/sched/pds_sched.h b/kernel/sched/pds_sched.h +diff --git a/kernel/sched/alt_debug.c b/kernel/sched/alt_debug.c new file mode 100644 -index 000000000000..6c3361f06087 +index 000000000000..1212a031700e --- /dev/null -+++ b/kernel/sched/pds_sched.h -@@ -0,0 +1,577 @@ -+#ifndef PDS_SCHED_H -+#define PDS_SCHED_H ++++ b/kernel/sched/alt_debug.c +@@ -0,0 +1,31 @@ ++/* ++ * kernel/sched/alt_debug.c ++ * ++ * Print the alt scheduler debugging details ++ * ++ * Author: Alfred Chen ++ * Date : 2020 ++ */ ++#include "sched.h" ++ ++/* ++ * This allows printing both to /proc/sched_debug and ++ * to the console ++ */ ++#define SEQ_printf(m, x...) \ ++ do { \ ++ if (m) \ ++ seq_printf(m, x); \ ++ else \ ++ pr_cont(x); \ ++ } while (0) ++ ++void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, ++ struct seq_file *m) ++{ ++ SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns), ++ get_nr_threads(p)); ++} ++ ++void proc_sched_set_task(struct task_struct *p) ++{} +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +new file mode 100644 +index 000000000000..99be2c51c88d +--- /dev/null ++++ b/kernel/sched/alt_sched.h +@@ -0,0 +1,555 @@ ++#ifndef ALT_SCHED_H ++#define ALT_SCHED_H + +#include + @@ -7721,6 +7282,13 @@ index 000000000000..6c3361f06087 + +#include "cpupri.h" + ++#ifdef CONFIG_SCHED_BMQ ++#include "bmq.h" ++#endif ++#ifdef CONFIG_SCHED_PDS ++#include "pds.h" ++#endif ++ +/* task_struct::on_rq states: */ +#define TASK_ON_RQ_QUEUED 1 +#define TASK_ON_RQ_MIGRATING 2 @@ -7741,13 +7309,7 @@ index 000000000000..6c3361f06087 +#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ +#define WF_FORK 0x02 /* child wakeup after fork */ +#define WF_MIGRATED 0x04 /* internal use, task got migrated */ -+ -+/* -+ * rq::clock_update_flags bits -+ */ -+#define RQCF_REQ_SKIP 0x01 -+#define RQCF_ACT_SKIP 0x02 -+#define RQCF_UPDATED 0x04 ++#define WF_ON_CPU 0x08 /* Wakee is on_rq */ + +/* + * This is the main, per-CPU runqueue data structure. @@ -7758,10 +7320,16 @@ index 000000000000..6c3361f06087 + raw_spinlock_t lock; + + struct task_struct __rcu *curr; -+ struct task_struct *idle, *stop; ++ struct task_struct *idle, *stop, *skip; + struct mm_struct *prev_mm; + ++#ifdef CONFIG_SCHED_BMQ ++ struct bmq queue; ++#endif ++#ifdef CONFIG_SCHED_PDS + struct skiplist_node sl_header; ++#endif ++ unsigned long watermark; + + /* switch count */ + u64 nr_switches; @@ -7775,18 +7343,14 @@ index 000000000000..6c3361f06087 +#ifdef CONFIG_SMP + int cpu; /* cpu of this runqueue */ + bool online; ++ + unsigned int ttwu_pending; -+ unsigned int clock_update_flags; ++ unsigned char nohz_idle_balance; ++ unsigned char idle_balance; + +#ifdef CONFIG_HAVE_SCHED_AVG_IRQ + struct sched_avg avg_irq; +#endif -+#ifdef CONFIG_SCHED_THERMAL_PRESSURE -+ struct sched_avg avg_thermal; -+#endif -+ -+ unsigned long queued_level; -+ unsigned long pending_level; + +#ifdef CONFIG_SCHED_SMT + int active_balance; @@ -7808,8 +7372,8 @@ index 000000000000..6c3361f06087 + long calc_load_active; + + u64 clock, last_tick; ++ u64 last_ts_switch; + u64 clock_task; -+ int dither; + + unsigned long nr_running; + unsigned long nr_uninterruptible; @@ -7840,15 +7404,19 @@ index 000000000000..6c3361f06087 + unsigned int ttwu_count; + unsigned int ttwu_local; +#endif /* CONFIG_SCHEDSTATS */ ++ +#ifdef CONFIG_CPU_IDLE + /* Must be inspected within a rcu lock section */ + struct cpuidle_state *idle_state; +#endif -+}; + -+#define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ -+ (task->flags & PF_FROZEN) == 0 && \ -+ (task->state & TASK_NOLOAD) == 0) ++#ifdef CONFIG_NO_HZ_COMMON ++#ifdef CONFIG_SMP ++ call_single_data_t nohz_csd; ++#endif ++ atomic_t nohz_flags; ++#endif /* CONFIG_NO_HZ_COMMON */ ++}; + +extern unsigned long calc_load_update; +extern atomic_long_t calc_load_tasks; @@ -7856,21 +7424,14 @@ index 000000000000..6c3361f06087 +extern void calc_global_load_tick(struct rq *this_rq); +extern long calc_load_fold_active(struct rq *this_rq, long adjust); + -+#ifndef CONFIG_SMP -+extern struct rq *uprq; -+#define cpu_rq(cpu) (uprq) -+#define this_rq() (uprq) -+#define raw_rq() (uprq) -+#define task_rq(p) (uprq) -+#define cpu_curr(cpu) ((uprq)->curr) -+#else /* CONFIG_SMP */ +DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); +#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) +#define this_rq() this_cpu_ptr(&runqueues) -+#define raw_rq() raw_cpu_ptr(&runqueues) +#define task_rq(p) cpu_rq(task_cpu(p)) +#define cpu_curr(cpu) (cpu_rq(cpu)->curr) ++#define raw_rq() raw_cpu_ptr(&runqueues) + ++#ifdef CONFIG_SMP +#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) +void register_sched_domain_sysctl(void); +void unregister_sched_domain_sysctl(void); @@ -7883,7 +7444,40 @@ index 000000000000..6c3361f06087 +} +#endif + -+#endif /* CONFIG_SMP */ ++extern bool sched_smp_initialized; ++ ++enum { ++ BASE_CPU_AFFINITY_CHK_LEVEL = 1, ++#ifdef CONFIG_SCHED_SMT ++ SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, ++#endif ++#ifdef CONFIG_SCHED_MC ++ MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, ++#endif ++ NR_CPU_AFFINITY_CHK_LEVEL ++}; ++ ++DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); ++ ++static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, ++ const cpumask_t *mask) ++{ ++ while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) ++ mask++; ++ return cpu; ++} ++ ++static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) ++{ ++ return cpumask_test_cpu(cpu, cpumask)? cpu : ++ __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); ++} ++ ++extern void flush_smp_call_function_from_idle(void); ++ ++#else /* !CONFIG_SMP */ ++static inline void flush_smp_call_function_from_idle(void) { } ++#endif + +#ifndef arch_scale_freq_tick +static __always_inline @@ -7896,7 +7490,7 @@ index 000000000000..6c3361f06087 +static __always_inline +unsigned long arch_scale_freq_capacity(int cpu) +{ -+ return SCHED_CAPACITY_SCALE; ++ return SCHED_CAPACITY_SCALE; +} +#endif + @@ -7925,24 +7519,6 @@ index 000000000000..6c3361f06087 + return rq->clock_task; +} + -+/** -+ * By default the decay is the default pelt decay period. -+ * The decay shift can change the decay period in -+ * multiples of 32. -+ * Decay shift Decay period(ms) -+ * 0 32 -+ * 1 64 -+ * 2 128 -+ * 3 256 -+ * 4 512 -+ */ -+extern int sched_thermal_decay_shift; -+ -+static inline u64 rq_clock_thermal(struct rq *rq) -+{ -+ return rq_clock_task(rq) >> sched_thermal_decay_shift; -+} -+ +/* + * {de,en}queue flags: + * @@ -7959,13 +7535,11 @@ index 000000000000..6c3361f06087 +/* + * Below are scheduler API which using in other kernel code + * It use the dummy rq_flags -+ * ToDo : PDS need to support these APIs for compatibility with mainline ++ * ToDo : BMQ need to support these APIs for compatibility with mainline + * scheduler code. + */ +struct rq_flags { + unsigned long flags; -+ struct pin_cookie cookie; -+ unsigned int clock_update_flags; +}; + +struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) @@ -7981,26 +7555,6 @@ index 000000000000..6c3361f06087 + raw_spin_unlock(&rq->lock); +} + -+static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) -+{ -+ rf->cookie = lockdep_pin_lock(&rq->lock); -+ -+#ifdef CONFIG_SCHED_DEBUG -+ rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); -+ rf->clock_update_flags = 0; -+#endif -+} -+ -+static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) -+{ -+#ifdef CONFIG_SCHED_DEBUG -+ if (rq->clock_update_flags > RQCF_ACT_SKIP) -+ rf->clock_update_flags = RQCF_UPDATED; -+#endif -+ -+ lockdep_unpin_lock(&rq->lock, rf->cookie); -+} -+ +static inline void +task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) + __releases(rq->lock) @@ -8011,36 +7565,12 @@ index 000000000000..6c3361f06087 +} + +static inline void -+rq_lock_irqsave(struct rq *rq, struct rq_flags *rf) -+ __acquires(rq->lock) -+{ -+ raw_spin_lock_irqsave(&rq->lock, rf->flags); -+ rq_pin_lock(rq, rf); -+} -+ -+static inline void -+rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) -+ __releases(rq->lock) -+{ -+ rq_unpin_lock(rq, rf); -+ raw_spin_unlock_irqrestore(&rq->lock, rf->flags); -+} -+ -+static inline void +rq_unlock_irq(struct rq *rq, struct rq_flags *rf) + __releases(rq->lock) +{ + raw_spin_unlock_irq(&rq->lock); +} + -+static inline void -+rq_unlock(struct rq *rq, struct rq_flags *rf) -+ __releases(rq->lock) -+{ -+ rq_unpin_lock(rq, rf); -+ raw_spin_unlock(&rq->lock); -+} -+ +static inline struct rq * +this_rq_lock_irq(struct rq_flags *rf) + __acquires(rq->lock) @@ -8066,8 +7596,6 @@ index 000000000000..6c3361f06087 + +extern struct static_key_false sched_schedstats; + -+extern void flush_smp_call_function_from_idle(void); -+ +#ifdef CONFIG_CPU_IDLE +static inline void idle_set_state(struct rq *rq, + struct cpuidle_state *idle_state) @@ -8103,6 +7631,24 @@ index 000000000000..6c3361f06087 + +#include "stats.h" + ++#ifdef CONFIG_NO_HZ_COMMON ++#define NOHZ_BALANCE_KICK_BIT 0 ++#define NOHZ_STATS_KICK_BIT 1 ++ ++#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) ++#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) ++ ++#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK) ++ ++#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) ++ ++/* TODO: needed? ++extern void nohz_balance_exit_idle(struct rq *rq); ++#else ++static inline void nohz_balance_exit_idle(struct rq *rq) { } ++*/ ++#endif ++ +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +struct irqtime { + u64 total; @@ -8166,15 +7712,8 @@ index 000000000000..6c3361f06087 + if (data) + data->func(data, rq_clock(rq), flags); +} -+ -+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) -+{ -+ if (cpu_of(rq) == smp_processor_id()) -+ cpufreq_update_util(rq, flags); -+} +#else +static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} -+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {} +#endif /* CONFIG_CPU_FREQ */ + +#ifdef CONFIG_NO_HZ_FULL @@ -8249,9 +7788,638 @@ index 000000000000..6c3361f06087 +void swake_up_all_locked(struct swait_queue_head *q); +void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); + -+#endif /* PDS_SCHED_H */ ++#endif /* ALT_SCHED_H */ +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +new file mode 100644 +index 000000000000..aff0bb30a884 +--- /dev/null ++++ b/kernel/sched/bmq.h +@@ -0,0 +1,20 @@ ++#ifndef BMQ_H ++#define BMQ_H ++ ++/* bits: ++ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ ++#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1) ++#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) ++ ++struct bmq { ++ DECLARE_BITMAP(bitmap, SCHED_BITS); ++ struct list_head heads[SCHED_BITS]; ++}; ++ ++ ++static inline int task_running_nice(struct task_struct *p) ++{ ++ return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); ++} ++ ++#endif +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +new file mode 100644 +index 000000000000..ad9a7c448da7 +--- /dev/null ++++ b/kernel/sched/bmq_imp.h +@@ -0,0 +1,185 @@ ++#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" ++ ++/* ++ * BMQ only routines ++ */ ++#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) ++#define boost_threshold(p) (sched_timeslice_ns >>\ ++ (15 - MAX_PRIORITY_ADJ - (p)->boost_prio)) ++ ++static inline void boost_task(struct task_struct *p) ++{ ++ int limit; ++ ++ switch (p->policy) { ++ case SCHED_NORMAL: ++ limit = -MAX_PRIORITY_ADJ; ++ break; ++ case SCHED_BATCH: ++ case SCHED_IDLE: ++ limit = 0; ++ break; ++ default: ++ return; ++ } ++ ++ if (p->boost_prio > limit) ++ p->boost_prio--; ++} ++ ++static inline void deboost_task(struct task_struct *p) ++{ ++ if (p->boost_prio < MAX_PRIORITY_ADJ) ++ p->boost_prio++; ++} ++ ++/* ++ * Common interfaces ++ */ ++static inline int task_sched_prio(struct task_struct *p, struct rq *rq) ++{ ++ return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; ++} ++ ++static inline void requeue_task(struct task_struct *p, struct rq *rq); ++ ++static inline void time_slice_expired(struct task_struct *p, struct rq *rq) ++{ ++ p->time_slice = sched_timeslice_ns; ++ ++ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { ++ if (SCHED_RR != p->policy) ++ deboost_task(p); ++ requeue_task(p, rq); ++ } ++} ++ ++static inline void update_task_priodl(struct task_struct *p) {} ++ ++static inline unsigned long sched_queue_watermark(struct rq *rq) ++{ ++ return find_first_bit(rq->queue.bitmap, SCHED_BITS); ++} ++ ++static inline void sched_queue_init(struct rq *rq) ++{ ++ struct bmq *q = &rq->queue; ++ int i; ++ ++ bitmap_zero(q->bitmap, SCHED_BITS); ++ for(i = 0; i < SCHED_BITS; i++) ++ INIT_LIST_HEAD(&q->heads[i]); ++} ++ ++static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) ++{ ++ struct bmq *q = &rq->queue; ++ ++ idle->bmq_idx = IDLE_TASK_SCHED_PRIO; ++ INIT_LIST_HEAD(&q->heads[idle->bmq_idx]); ++ list_add(&idle->bmq_node, &q->heads[idle->bmq_idx]); ++ set_bit(idle->bmq_idx, q->bitmap); ++} ++ ++/* ++ * This routine used in bmq scheduler only which assume the idle task in the bmq ++ */ ++static inline struct task_struct *sched_rq_first_task(struct rq *rq) ++{ ++ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); ++ const struct list_head *head = &rq->queue.heads[idx]; ++ ++ return list_first_entry(head, struct task_struct, bmq_node); ++} ++ ++static inline struct task_struct * ++sched_rq_next_task(struct task_struct *p, struct rq *rq) ++{ ++ unsigned long idx = p->bmq_idx; ++ struct list_head *head = &rq->queue.heads[idx]; ++ ++ if (list_is_last(&p->bmq_node, head)) { ++ idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1); ++ head = &rq->queue.heads[idx]; ++ ++ return list_first_entry(head, struct task_struct, bmq_node); ++ } ++ ++ return list_next_entry(p, bmq_node); ++} ++ ++#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ ++ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ ++ sched_info_dequeued(rq, p); \ ++ \ ++ list_del(&p->bmq_node); \ ++ if (list_empty(&rq->queue.heads[p->bmq_idx])) { \ ++ clear_bit(p->bmq_idx, rq->queue.bitmap);\ ++ func; \ ++ } ++ ++#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ ++ sched_info_queued(rq, p); \ ++ psi_enqueue(p, flags); \ ++ \ ++ p->bmq_idx = task_sched_prio(p, rq); \ ++ list_add_tail(&p->bmq_node, &rq->queue.heads[p->bmq_idx]); \ ++ set_bit(p->bmq_idx, rq->queue.bitmap) ++ ++#define __SCHED_REQUEUE_TASK(p, rq, func) \ ++{ \ ++ int idx = task_sched_prio(p, rq); \ ++\ ++ list_del(&p->bmq_node); \ ++ list_add_tail(&p->bmq_node, &rq->queue.heads[idx]); \ ++ if (idx != p->bmq_idx) { \ ++ if (list_empty(&rq->queue.heads[p->bmq_idx])) \ ++ clear_bit(p->bmq_idx, rq->queue.bitmap); \ ++ p->bmq_idx = idx; \ ++ set_bit(p->bmq_idx, rq->queue.bitmap); \ ++ func; \ ++ } \ ++} ++ ++static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) ++{ ++ return (task_sched_prio(p, rq) != p->bmq_idx); ++} ++ ++static void sched_task_fork(struct task_struct *p, struct rq *rq) ++{ ++ p->boost_prio = (p->boost_prio < 0) ? ++ p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; ++} ++ ++/** ++ * task_prio - return the priority value of a given task. ++ * @p: the task in question. ++ * ++ * Return: The priority value as seen by users in /proc. ++ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes ++ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). ++ */ ++int task_prio(const struct task_struct *p) ++{ ++ if (p->prio < MAX_RT_PRIO) ++ return (p->prio - MAX_RT_PRIO); ++ return (p->prio - MAX_RT_PRIO + p->boost_prio); ++} ++ ++static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) ++{ ++ p->boost_prio = MAX_PRIORITY_ADJ; ++} ++ ++static void sched_task_ttwu(struct task_struct *p) ++{ ++ if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) ++ boost_task(p); ++} ++ ++static void sched_task_deactivate(struct task_struct *p, struct rq *rq) ++{ ++ if (rq_switch_time(rq) < boost_threshold(p)) ++ boost_task(p); ++} +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index e39008242cf4..5963716fe391 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -183,6 +183,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, + return cpufreq_driver_resolve_freq(policy, freq); + } + ++#ifndef CONFIG_SCHED_ALT + /* + * This function computes an effective utilization for the given CPU, to be + * used for frequency selection given the linear relation: f = u * f_max. +@@ -300,6 +301,13 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) + + return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL); + } ++#else /* CONFIG_SCHED_ALT */ ++static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) ++{ ++ sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); ++ return sg_cpu->max; ++} ++#endif + + /** + * sugov_iowait_reset() - Reset the IO boost status of a CPU. +@@ -443,7 +451,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } + */ + static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy) + { ++#ifndef CONFIG_SCHED_ALT + if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) ++#endif + sg_policy->limits_changed = true; + } + +@@ -686,6 +696,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) + } + + ret = sched_setattr_nocheck(thread, &attr); ++ + if (ret) { + kthread_stop(thread); + pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); +@@ -912,6 +923,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) + cpufreq_governor_init(schedutil_gov); + + #ifdef CONFIG_ENERGY_MODEL ++#ifndef CONFIG_SCHED_ALT + extern bool sched_energy_update; + extern struct mutex sched_energy_mutex; + +@@ -942,4 +954,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy, + } + + } ++#else /* CONFIG_SCHED_ALT */ ++void sched_cpufreq_governor_change(struct cpufreq_policy *policy, ++ struct cpufreq_governor *old_gov) ++{ ++} ++#endif + #endif +diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c +index 5a55d2300452..66a0ab7165f0 100644 +--- a/kernel/sched/cputime.c ++++ b/kernel/sched/cputime.c +@@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, u64 cputime) + p->utime += cputime; + account_group_user_time(p, cputime); + +- index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; ++ index = task_running_nice(p) ? CPUTIME_NICE : CPUTIME_USER; + + /* Add user time to cpustat. */ + task_group_account_field(p, index, cputime); +@@ -146,7 +146,7 @@ void account_guest_time(struct task_struct *p, u64 cputime) + p->gtime += cputime; + + /* Add guest time to cpustat. */ +- if (task_nice(p) > 0) { ++ if (task_running_nice(p)) { + cpustat[CPUTIME_NICE] += cputime; + cpustat[CPUTIME_GUEST_NICE] += cputime; + } else { +@@ -269,7 +269,7 @@ static inline u64 account_other_time(u64 max) + #ifdef CONFIG_64BIT + static inline u64 read_sum_exec_runtime(struct task_struct *t) + { +- return t->se.sum_exec_runtime; ++ return tsk_seruntime(t); + } + #else + static u64 read_sum_exec_runtime(struct task_struct *t) +@@ -279,7 +279,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t) + struct rq *rq; + + rq = task_rq_lock(t, &rf); +- ns = t->se.sum_exec_runtime; ++ ns = tsk_seruntime(t); + task_rq_unlock(rq, t, &rf); + + return ns; +@@ -614,7 +614,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, + void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) + { + struct task_cputime cputime = { +- .sum_exec_runtime = p->se.sum_exec_runtime, ++ .sum_exec_runtime = tsk_seruntime(p), + }; + + task_cputime(p, &cputime.utime, &cputime.stime); +diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c +index f324dc36fc43..a6b566bda65b 100644 +--- a/kernel/sched/idle.c ++++ b/kernel/sched/idle.c +@@ -369,6 +369,7 @@ void cpu_startup_entry(enum cpuhp_state state) + do_idle(); + } + ++#ifndef CONFIG_SCHED_ALT + /* + * idle-task scheduling class. + */ +@@ -482,3 +483,4 @@ const struct sched_class idle_sched_class + .switched_to = switched_to_idle, + .update_curr = update_curr_idle, + }; ++#endif +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +new file mode 100644 +index 000000000000..7fdeace7e8a5 +--- /dev/null ++++ b/kernel/sched/pds.h +@@ -0,0 +1,14 @@ ++#ifndef PDS_H ++#define PDS_H ++ ++/* bits: ++ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ ++#define SCHED_BITS (MAX_RT_PRIO + 20 + 1) ++#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) ++ ++static inline int task_running_nice(struct task_struct *p) ++{ ++ return (p->prio > DEFAULT_PRIO); ++} ++ ++#endif +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +new file mode 100644 +index 000000000000..6baee5e961b9 +--- /dev/null ++++ b/kernel/sched/pds_imp.h +@@ -0,0 +1,257 @@ ++#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" ++ ++static const u64 user_prio2deadline[NICE_WIDTH] = { ++/* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, ++/* -15 */ 6754965, 7430461, 8173507, 8990857, 9889942, ++/* -10 */ 10878936, 11966829, 13163511, 14479862, 15927848, ++/* -5 */ 17520632, 19272695, 21199964, 23319960, 25651956, ++/* 0 */ 28217151, 31038866, 34142752, 37557027, 41312729, ++/* 5 */ 45444001, 49988401, 54987241, 60485965, 66534561, ++/* 10 */ 73188017, 80506818, 88557499, 97413248, 107154572, ++/* 15 */ 117870029, 129657031, 142622734, 156885007, 172573507 ++}; ++ ++static const unsigned char dl_level_map[] = { ++/* 0 4 8 12 */ ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, ++/* 16 20 24 28 */ ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, ++/* 32 36 40 44 */ ++ 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, ++/* 48 52 56 60 */ ++ 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, ++/* 64 68 72 76 */ ++ 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 7, 6, 5, 4, 3, 2, ++/* 80 84 88 92 */ ++ 1, 0 ++}; ++ ++static inline int ++task_sched_prio(const struct task_struct *p, const struct rq *rq) ++{ ++ size_t delta; ++ ++ if (p == rq->idle) ++ return IDLE_TASK_SCHED_PRIO; ++ ++ if (p->prio < MAX_RT_PRIO) ++ return p->prio; ++ ++ delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 21; ++ delta = min((size_t)delta, ARRAY_SIZE(dl_level_map) - 1); ++ ++ return MAX_RT_PRIO + dl_level_map[delta]; ++} ++ ++static inline void update_task_priodl(struct task_struct *p) ++{ ++ p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8); ++} ++ ++static inline void requeue_task(struct task_struct *p, struct rq *rq); ++ ++static inline void time_slice_expired(struct task_struct *p, struct rq *rq) ++{ ++ /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ ++ p->time_slice = sched_timeslice_ns; ++ ++ if (p->prio >= MAX_RT_PRIO) ++ p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; ++ update_task_priodl(p); ++ ++ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) ++ requeue_task(p, rq); ++} ++ ++/* ++ * pds_skiplist_task_search -- search function used in PDS run queue skip list ++ * node insert operation. ++ * @it: iterator pointer to the node in the skip list ++ * @node: pointer to the skiplist_node to be inserted ++ * ++ * Returns true if key of @it is less or equal to key value of @node, otherwise ++ * false. ++ */ ++static inline bool ++pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node) ++{ ++ return (skiplist_entry(it, struct task_struct, sl_node)->priodl <= ++ skiplist_entry(node, struct task_struct, sl_node)->priodl); ++} ++ ++/* ++ * Define the skip list insert function for PDS ++ */ ++DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search); ++ ++/* ++ * Init the queue structure in rq ++ */ ++static inline void sched_queue_init(struct rq *rq) ++{ ++ FULL_INIT_SKIPLIST_NODE(&rq->sl_header); ++} ++ ++/* ++ * Init idle task and put into queue structure of rq ++ * IMPORTANT: may be called multiple times for a single cpu ++ */ ++static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) ++{ ++ /*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/ ++ int default_prio = idle->prio; ++ ++ idle->prio = MAX_PRIO; ++ idle->deadline = 0ULL; ++ update_task_priodl(idle); ++ ++ FULL_INIT_SKIPLIST_NODE(&rq->sl_header); ++ ++ idle->sl_node.level = idle->sl_level; ++ pds_skiplist_insert(&rq->sl_header, &idle->sl_node); ++ ++ idle->prio = default_prio; ++} ++ ++/* ++ * This routine assume that the idle task always in queue ++ */ ++static inline struct task_struct *sched_rq_first_task(struct rq *rq) ++{ ++ struct skiplist_node *node = rq->sl_header.next[0]; ++ ++ BUG_ON(node == &rq->sl_header); ++ return skiplist_entry(node, struct task_struct, sl_node); ++} ++ ++static inline struct task_struct * ++sched_rq_next_task(struct task_struct *p, struct rq *rq) ++{ ++ struct skiplist_node *next = p->sl_node.next[0]; ++ ++ BUG_ON(next == &rq->sl_header); ++ return skiplist_entry(next, struct task_struct, sl_node); ++} ++ ++static inline unsigned long sched_queue_watermark(struct rq *rq) ++{ ++ return task_sched_prio(sched_rq_first_task(rq), rq); ++} ++ ++#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ ++ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ ++ sched_info_dequeued(rq, p); \ ++ \ ++ if (skiplist_del_init(&rq->sl_header, &p->sl_node)) { \ ++ func; \ ++ } ++ ++#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ ++ sched_info_queued(rq, p); \ ++ psi_enqueue(p, flags); \ ++ \ ++ p->sl_node.level = p->sl_level; \ ++ pds_skiplist_insert(&rq->sl_header, &p->sl_node) ++ ++/* ++ * Requeue a task @p to @rq ++ */ ++#define __SCHED_REQUEUE_TASK(p, rq, func) \ ++{\ ++ bool b_first = skiplist_del_init(&rq->sl_header, &p->sl_node); \ ++\ ++ p->sl_node.level = p->sl_level; \ ++ if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) { \ ++ func; \ ++ } \ ++} ++ ++static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) ++{ ++ struct skiplist_node *node = p->sl_node.prev[0]; ++ ++ if (node != &rq->sl_header) { ++ struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node); ++ ++ if (t->priodl > p->priodl) ++ return true; ++ } ++ ++ node = p->sl_node.next[0]; ++ if (node != &rq->sl_header) { ++ struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node); ++ ++ if (t->priodl < p->priodl) ++ return true; ++ } ++ ++ return false; ++} ++ ++/* ++ * pds_skiplist_random_level -- Returns a pseudo-random level number for skip ++ * list node which is used in PDS run queue. ++ * ++ * In current implementation, based on testing, the first 8 bits in microseconds ++ * of niffies are suitable for random level population. ++ * find_first_bit() is used to satisfy p = 0.5 between each levels, and there ++ * should be platform hardware supported instruction(known as ctz/clz) to speed ++ * up this function. ++ * The skiplist level for a task is populated when task is created and doesn't ++ * change in task's life time. When task is being inserted into run queue, this ++ * skiplist level is set to task's sl_node->level, the skiplist insert function ++ * may change it based on current level of the skip lsit. ++ */ ++static inline int pds_skiplist_random_level(const struct task_struct *p) ++{ ++ long unsigned int randseed; ++ ++ /* ++ * 1. Some architectures don't have better than microsecond resolution ++ * so mask out ~microseconds as a factor of the random seed for skiplist ++ * insertion. ++ * 2. Use address of task structure pointer as another factor of the ++ * random seed for task burst forking scenario. ++ */ ++ randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10; ++ ++ return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1); ++} ++ ++static void sched_task_fork(struct task_struct *p, struct rq *rq) ++{ ++ p->sl_level = pds_skiplist_random_level(p); ++ if (p->prio >= MAX_RT_PRIO) ++ p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; ++ update_task_priodl(p); ++} ++ ++/** ++ * task_prio - return the priority value of a given task. ++ * @p: the task in question. ++ * ++ * Return: The priority value as seen by users in /proc. ++ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes ++ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). ++ */ ++int task_prio(const struct task_struct *p) ++{ ++ int ret; ++ ++ if (p->prio < MAX_RT_PRIO) ++ return (p->prio - MAX_RT_PRIO); ++ ++ preempt_disable(); ++ ret = task_sched_prio(p, this_rq()) - MAX_RT_PRIO; ++ preempt_enable(); ++ ++ return ret; ++} ++ ++static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) ++{ ++ time_slice_expired(p, rq); ++} ++ ++static void sched_task_ttwu(struct task_struct *p) {} ++static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c -index b647d04d9c8b..05b6cfd91842 100644 +index 2c613e1cff3a..0103b2a7201d 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -83,6 +83,8 @@ static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3) @@ -8263,22 +8431,25 @@ index b647d04d9c8b..05b6cfd91842 100644 /* * Accumulate the three separate parts of the sum; d1 the remainder * of the last (incomplete) period, d2 the span of full periods and d3 -@@ -250,6 +250,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load) +@@ -270,6 +270,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load) WRITE_ONCE(sa->util_avg, sa->util_sum / divider); } -+#ifndef CONFIG_SCHED_PDS ++#ifndef CONFIG_SCHED_ALT /* * sched_entity: * -@@ -367,6 +368,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) +@@ -387,8 +388,9 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) return 0; } +#endif - #ifdef CONFIG_SCHED_THERMAL_PRESSURE +-#ifdef CONFIG_SCHED_THERMAL_PRESSURE ++#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT) /* + * thermal: + * @@ -388,10 +393,8 @@ int update_irq_load_avg(struct rq *rq, u64 running) 1, 1); @@ -8292,14 +8463,14 @@ index b647d04d9c8b..05b6cfd91842 100644 return ret; } diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h -index eb034d9f024d..a074572f2976 100644 +index 795e43e02afc..856163dac896 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h -@@ -1,11 +1,13 @@ +@@ -1,13 +1,15 @@ #ifdef CONFIG_SMP #include "sched-pelt.h" -+#ifndef CONFIG_SCHED_PDS ++#ifndef CONFIG_SCHED_ALT int __update_load_avg_blocked_se(u64 now, struct sched_entity *se); int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se); int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq); @@ -8307,66 +8478,73 @@ index eb034d9f024d..a074572f2976 100644 int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); +#endif - #ifdef CONFIG_SCHED_THERMAL_PRESSURE +-#ifdef CONFIG_SCHED_THERMAL_PRESSURE ++#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT) int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity); -@@ -37,6 +39,7 @@ update_irq_load_avg(struct rq *rq, u64 running) - } - #endif -+#ifndef CONFIG_SCHED_PDS + static inline u64 thermal_load_avg(struct rq *rq) +@@ -42,6 +44,7 @@ static inline u32 get_pelt_divider(struct sched_avg *avg) + return LOAD_AVG_MAX - 1024 + avg->period_contrib; + } + ++#ifndef CONFIG_SCHED_ALT /* * When a task is dequeued, its estimated utilization should not be update if * its util_avg has not been updated at least once. -@@ -157,9 +160,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) +@@ -162,9 +165,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) return rq_clock_pelt(rq_of(cfs_rq)); } #endif -+#endif /* CONFIG_SCHED_PDS */ ++#endif /* CONFIG_SCHED_ALT */ #else -+#ifndef CONFIG_SCHED_PDS ++#ifndef CONFIG_SCHED_ALT static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) { -@@ -188,6 +193,7 @@ static inline u64 thermal_load_avg(struct rq *rq) +@@ -182,6 +187,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running) { return 0; } +#endif static inline int - update_irq_load_avg(struct rq *rq, u64 running) + update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index db3a57675ccf..5a8060bd2343 100644 +index 28709f6b0975..6bc68bacbac8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2,6 +2,10 @@ /* * Scheduler internal types and methods: */ -+#ifdef CONFIG_SCHED_PDS -+#include "pds_sched.h" ++#ifdef CONFIG_SCHED_ALT ++#include "alt_sched.h" +#else + #include #include -@@ -2546,3 +2550,5 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) +@@ -2626,3 +2630,9 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) void swake_up_all_locked(struct swait_queue_head *q); void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); + -+#endif /* !CONFIG_SCHED_PDS */ ++static inline int task_running_nice(struct task_struct *p) ++{ ++ return (task_nice(p) > 0); ++} ++#endif /* !CONFIG_SCHED_ALT */ diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c -index 750fb3c67eed..45bd43942575 100644 +index 750fb3c67eed..108422ebc7bf 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -22,8 +22,10 @@ static int show_schedstat(struct seq_file *seq, void *v) } else { struct rq *rq; #ifdef CONFIG_SMP -+#ifndef CONFIG_SCHED_PDS ++#ifndef CONFIG_SCHED_ALT struct sched_domain *sd; int dcount = 0; +#endif @@ -8377,7 +8555,7 @@ index 750fb3c67eed..45bd43942575 100644 seq_printf(seq, "\n"); #ifdef CONFIG_SMP -+#ifndef CONFIG_SCHED_PDS ++#ifndef CONFIG_SCHED_ALT /* domain-specific stats */ rcu_read_lock(); for_each_domain(cpu, sd) { @@ -8389,84 +8567,141 @@ index 750fb3c67eed..45bd43942575 100644 #endif } return 0; +diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c +index 1bd7e3af904f..cc946a9bd550 100644 +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -4,6 +4,7 @@ + */ + #include "sched.h" + ++#ifndef CONFIG_SCHED_ALT + DEFINE_MUTEX(sched_domains_mutex); + + /* Protected by sched_domains_mutex: */ +@@ -1180,8 +1181,10 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) + */ + + static int default_relax_domain_level = -1; ++#endif /* CONFIG_SCHED_ALT */ + int sched_domain_level_max; + ++#ifndef CONFIG_SCHED_ALT + static int __init setup_relax_domain_level(char *str) + { + if (kstrtoint(str, 0, &default_relax_domain_level)) +@@ -1413,6 +1416,7 @@ sd_init(struct sched_domain_topology_level *tl, + + return sd; + } ++#endif /* CONFIG_SCHED_ALT */ + + /* + * Topology list, bottom-up. +@@ -1442,6 +1446,7 @@ void set_sched_topology(struct sched_domain_topology_level *tl) + sched_domain_topology = tl; + } + ++#ifndef CONFIG_SCHED_ALT + #ifdef CONFIG_NUMA + + static const struct cpumask *sd_numa_mask(int cpu) +@@ -2316,3 +2321,17 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], + partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); + mutex_unlock(&sched_domains_mutex); + } ++#else /* CONFIG_SCHED_ALT */ ++void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], ++ struct sched_domain_attr *dattr_new) ++{} ++ ++#ifdef CONFIG_NUMA ++int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; ++ ++int sched_numa_find_closest(const struct cpumask *cpus, int cpu) ++{ ++ return best_mask_cpu(cpu, cpus); ++} ++#endif /* CONFIG_NUMA */ ++#endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index 8a176d8727a3..b9dde576b576 100644 +index afad085960b8..e91b4cb3042b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c -@@ -130,9 +130,13 @@ static int __maybe_unused four = 4; - static unsigned long zero_ul; - static unsigned long one_ul = 1; - static unsigned long long_max = LONG_MAX; --static int one_hundred = 100; --static int two_hundred = 200; --static int one_thousand = 1000; -+static int __read_mostly one_hundred = 100; -+static int __read_mostly two_hundred = 200; -+static int __read_mostly one_thousand = 1000; -+#ifdef CONFIG_SCHED_PDS -+extern int rr_interval; +@@ -120,6 +120,10 @@ static unsigned long long_max = LONG_MAX; + static int one_hundred = 100; + static int two_hundred = 200; + static int one_thousand = 1000; ++#ifdef CONFIG_SCHED_ALT ++static int __maybe_unused zero = 0; +extern int sched_yield_type; +#endif #ifdef CONFIG_PRINTK static int ten_thousand = 10000; #endif -@@ -288,7 +292,7 @@ static struct ctl_table sysctl_base_table[] = { - { } - }; +@@ -184,7 +188,7 @@ static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT; + int sysctl_legacy_va_layout; + #endif -#ifdef CONFIG_SCHED_DEBUG -+#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_PDS) ++#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_ALT) static int min_sched_granularity_ns = 100000; /* 100 usecs */ static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ static int min_wakeup_granularity_ns; /* 0 usecs */ -@@ -305,6 +309,7 @@ static int max_extfrag_threshold = 1000; - #endif +@@ -1652,6 +1656,7 @@ int proc_do_static_key(struct ctl_table *table, int write, + } static struct ctl_table kern_table[] = { -+#ifndef CONFIG_SCHED_PDS ++#ifndef CONFIG_SCHED_ALT { .procname = "sched_child_runs_first", .data = &sysctl_sched_child_runs_first, -@@ -486,6 +491,7 @@ static struct ctl_table kern_table[] = { +@@ -1854,6 +1859,7 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_ONE, }, #endif -+#endif /* !CONFIG_SCHED_PDS */ ++#endif /* !CONFIG_SCHED_ALT */ #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", -@@ -1049,6 +1055,26 @@ static struct ctl_table kern_table[] = { +@@ -2430,6 +2436,17 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -+#ifdef CONFIG_SCHED_PDS -+ { -+ .procname = "rr_interval", -+ .data = &rr_interval, -+ .maxlen = sizeof (int), -+ .mode = 0644, -+ .proc_handler = &proc_dointvec_minmax, -+ .extra1 = SYSCTL_ONE, -+ .extra2 = &one_thousand, -+ }, ++#ifdef CONFIG_SCHED_ALT + { + .procname = "yield_type", + .data = &sched_yield_type, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, -+ .extra1 = SYSCTL_ZERO, ++ .extra1 = &zero, + .extra2 = &two, + }, +#endif #if defined(CONFIG_S390) && defined(CONFIG_SMP) { .procname = "spin_retry", +diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c +index 95b6a708b040..81f2ee62c807 100644 +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1927,8 +1927,10 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, + int ret = 0; + u64 slack; + ++#ifndef CONFIG_SCHED_ALT + slack = current->timer_slack_ns; + if (dl_task(current) || rt_task(current)) ++#endif + slack = 0; + + hrtimer_init_sleeper_on_stack(&t, clockid, mode); diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c -index 2fd3b3fa68bf..6f3b08afdd4c 100644 +index a71758e34e45..d20c347df861 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c -@@ -236,7 +236,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples) +@@ -216,7 +216,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples) u64 stime, utime; task_cputime(p, &utime, &stime); @@ -8475,15 +8710,15 @@ index 2fd3b3fa68bf..6f3b08afdd4c 100644 } static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, -@@ -806,6 +806,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, +@@ -801,6 +801,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, } } -+#ifndef CONFIG_SCHED_PDS ++#ifndef CONFIG_SCHED_ALT static inline void check_dl_overrun(struct task_struct *tsk) { if (tsk->dl.dl_overrun) { -@@ -813,6 +814,7 @@ static inline void check_dl_overrun(struct task_struct *tsk) +@@ -808,6 +809,7 @@ static inline void check_dl_overrun(struct task_struct *tsk) __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); } } @@ -8491,18 +8726,18 @@ index 2fd3b3fa68bf..6f3b08afdd4c 100644 static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) { -@@ -840,8 +842,10 @@ static void check_thread_timers(struct task_struct *tsk, +@@ -835,8 +837,10 @@ static void check_thread_timers(struct task_struct *tsk, u64 samples[CPUCLOCK_MAX]; unsigned long soft; -+#ifndef CONFIG_SCHED_PDS ++#ifndef CONFIG_SCHED_ALT if (dl_task(tsk)) check_dl_overrun(tsk); +#endif if (expiry_cache_is_inactive(pct)) return; -@@ -855,7 +859,7 @@ static void check_thread_timers(struct task_struct *tsk, +@@ -850,7 +854,7 @@ static void check_thread_timers(struct task_struct *tsk, soft = task_rlimit(tsk, RLIMIT_RTTIME); if (soft != RLIM_INFINITY) { /* Task RT timeout is accounted in jiffies. RTTIME is usec */ @@ -8511,11 +8746,11 @@ index 2fd3b3fa68bf..6f3b08afdd4c 100644 unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); /* At the hard limit, send SIGKILL. No further action. */ -@@ -1091,8 +1095,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk) +@@ -1086,8 +1090,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk) return true; } -+#ifndef CONFIG_SCHED_PDS ++#ifndef CONFIG_SCHED_ALT if (dl_task(tsk) && tsk->dl.dl_overrun) return true; +#endif @@ -8523,15 +8758,15 @@ index 2fd3b3fa68bf..6f3b08afdd4c 100644 return false; } diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c -index b5e3496cf803..0816db0b9c16 100644 +index b5e3496cf803..65f60c77bc50 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data) { /* Make this a -deadline thread */ static const struct sched_attr attr = { -+#ifdef CONFIG_SCHED_PDS -+ /* No deadline on BFS, use RR */ ++#ifdef CONFIG_SCHED_ALT ++ /* No deadline on BMQ/PDS, use RR */ + .sched_policy = SCHED_RR, +#else .sched_policy = SCHED_DEADLINE, diff --git a/linux59-rc-tkg/linux59-tkg-patches/0011-ZFS-fix.patch b/linux59-tkg/linux59-tkg-patches/0011-ZFS-fix.patch similarity index 100% rename from linux59-rc-tkg/linux59-tkg-patches/0011-ZFS-fix.patch rename to linux59-tkg/linux59-tkg-patches/0011-ZFS-fix.patch diff --git a/linux59-rc-tkg/linux59-tkg-patches/0012-misc-additions.patch b/linux59-tkg/linux59-tkg-patches/0012-misc-additions.patch similarity index 100% rename from linux59-rc-tkg/linux59-tkg-patches/0012-misc-additions.patch rename to linux59-tkg/linux59-tkg-patches/0012-misc-additions.patch